# Deploying a ML model as web service on Kubernetes
This notebook shows the steps to : registering a model, creating an image, creating Kubernetes Deployment for a service.

In [None]:
import azureml.core

print(azureml.core.VERSION)

# Get workspace
Please create a azure Machine learnign workspace on portal.azure.com before runing this notebook, once created download config.json from your workspace,  please place config.json file from portal to same folder as notebook ![Capture_withoverlay.JPG](pics/conf_file_download.JPG)


In [None]:
from azureml.core.workspace import Workspace

ws = Workspace.from_config(path="ml_workspace_config.json")
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep="\n")

# Download the model

Prior to registering the model, you should have a TensorFlow [Saved Model](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md) in the `resnet50` directory. This cell will download a [pretrained resnet50](http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v1_fp32_savedmodel_NCHW_jpg.tar.gz) and unpack it to that directory.

In [None]:
import os
import requests
import shutil
import tarfile
import tempfile

from io import BytesIO

model_url = "http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v1_fp32_savedmodel_NCHW_jpg.tar.gz"

archive_prefix = "./resnet_v1_fp32_savedmodel_NCHW_jpg/1538686758/"
target_folder = "resnet50"

if not os.path.exists(target_folder):
    print(f'target_folder "{target_folder}" does not exist.')
    print(f'Downloading it from "{model_url}"...')
    response = requests.get(model_url)
    archive = tarfile.open(fileobj=BytesIO(response.content))
    with tempfile.TemporaryDirectory() as temp_folder:
        print(f'extracting in "{temp_folder}"...')
        archive.extractall(temp_folder)
        print(f'copyint to "{target_folder}"...')
        shutil.copytree(
            os.path.join(temp_folder, archive_prefix), target_folder
        )

# Register the model
Register an existing trained model, add description and tags.

In [None]:
from azureml.core.model import Model

model = Model.register(
    model_path="resnet50",  # This points to the local directory to upload.
    model_name="resnet50",  # This is the name the model is registered as.
    tags={"area": "Image classification", "type": "classification"},
    description="Image classification trained on Imagenet Dataset",
    workspace=ws,
)

print(model.name, model.description, model.version)

# Deploy the model as a web service

We begin by writing a score.py file that will be invoked by the web service call. The init() function is called once when the container is started so we load the model using the Tensorflow session. The run() function is called when the webservice is invoked for inferencing. After running the code below you should see a score.py file in the same folder as this notebook.

In [None]:
%%writefile score.py
import tensorflow as tf
import numpy as np
import json
import os
from azureml.contrib.services.aml_request import AMLRequest, rawhttp
from azureml.contrib.services.aml_response import AMLResponse


def init():
    global session
    global input_name
    global output_name

    print(f"getting tf.Session()...")
    session = tf.Session()
    print(f"got tf.Session()")

    # AZUREML_MODEL_DIR is an environment variable created during deployment.
    # It is the path to the model folder (./azureml-models/$MODEL_NAME/$VERSION)
    # For multiple models, it points to the folder containing all deployed models (./azureml-models)
    model_path = os.path.join(os.getenv("AZUREML_MODEL_DIR"), "resnet50")
    print(f'model_path: "{model_path}"')

    model = tf.saved_model.loader.load(session, ["serve"], model_path)
    if len(model.signature_def["serving_default"].inputs) > 1:
        raise ValueError("This score.py only supports one input")
    input_name = [
        tensor.name
        for tensor in model.signature_def["serving_default"].inputs.values()
    ][0]
    output_name = [
        tensor.name
        for tensor in model.signature_def["serving_default"].outputs.values()
    ]


@rawhttp
def run(request):
    if request.method == "POST":
        reqBody = request.get_data(False)
        resp = score(reqBody)
        return AMLResponse(resp, 200)
    if request.method == "GET":
        respBody = str.encode("GET is not supported")
        return AMLResponse(respBody, 405)
    return AMLResponse("bad request", 500)


def score(data):
    print(f"doing score()...")
    result = session.run(output_name, {input_name: [data]})
    return json.dumps(result[1].tolist())


if __name__ == "__main__":
    init()
    with open("test_image.jpg", "rb") as f:
        content = f.read()
        print(score(content))
    print(f"done")

Now create the deployment configuration objects. We mention `score.py`, which we just defined.

In [None]:
# Set the web service configuration (using default here)
from azureml.core.model import InferenceConfig

# from azureml.core.webservice import AksWebservice
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.environment import Environment, DEFAULT_GPU_IMAGE

env = Environment("deploytoedgeenv")
# Please see [Azure ML Containers repository](https://github.com/Azure/AzureML-Containers#featured-tags)
# for open-sourced GPU base images.
env.docker.base_image = DEFAULT_GPU_IMAGE
env.python.conda_dependencies = CondaDependencies.create(
    conda_packages=["tensorflow-gpu==1.12.0", "numpy"],
    pip_packages=["azureml-contrib-services", "azureml-defaults"],
)

inference_config = InferenceConfig(entry_script="score.py", environment=env)

## Create container image in Azure ML
Use Azure ML to create the container image. This step will likely take a few minutes.

In [None]:
# provide name of azure contaienr image and tag
imagename = "tfgpuk8s"
imagelabel = "1.0"

In [None]:
# Builds an image in ACR.

package = Model.package(
    ws,
    [model],
    inference_config=inference_config,
    image_name=imagename,
    image_label=imagelabel,
)
package.wait_for_creation(show_output=True)

print("ACR:", package.get_container_registry)
print("Image:", package.location)

## Deploy container to Azure Container Registry

In [None]:
acr_name = package.location.split("/")[0]
reg_name = acr_name.split(".")[0]
subscription_id = ws.subscription_id

print("acr_name: {}".format(acr_name))
print("subscription_id: {}".format(subscription_id))

# TODO: Derive image_location through code.
image_location = acr_name + "/" + imagename + ":" + imagelabel

print("image_location: {}".format(image_location))

# Fetch username, password of ACR.
from azure.mgmt.containerregistry import ContainerRegistryManagementClient
from azure.mgmt import containerregistry

client = ContainerRegistryManagementClient(ws._auth, subscription_id)
result = client.registries.list_credentials(
    ws.resource_group, reg_name, custom_headers=None, raw=False
)

myusername = result.username
mypassword = result.passwords[0].value

# Do not commit your credentials to this notebook's source repository.
# print("using username \"" + myusername + "\"")
# print("using password \"" + mypassword + "\"")

## Preparing Deployment file for AS Edge or another Kubernetes cluster.

Below is the template for our Kubernetes deployments. We create it within this notebook for visibility, for more complex deployments you can make it part of source control repository.

We will apply `image_location` that we composed to __REGISTRY_IMAGE_LOCATION.

The value for the new marco-definition, __REGISTRY_SECRET_NAME, is the result of the authentication process you need to do to be able to connect to a private container registry(docker image repository)

We will do steps described in https://github.com/Azure-Samples/azure-intelligent-edge-patterns/tree/master/Research/deploying-model-on-k8s , and you are welcome to familiarize yourself with the subject at https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/


## Validating kubectl on AS Edge

If you created your cluster on Azure Stack Edge using aks-engine, you will have `kubectl`. You can check the version:

    $ kubectl version --client
    Client Version: version.Info{Major:"1", Minor:"19", GitVersion:"v1.19.2",
    GitCommit:"f5743093fd1c663cb0cbc89748f730662345d44d", GitTreeState:"clean",
    BuildDate:"2020-09-16T13:41:02Z", GoVersion:"go1.15", Compiler:"gc", Platform:"linux/amd64"}
    
For more information, visit https://docs.microsoft.com/en-us/azure/databox-online/azure-stack-edge-j-series-deploy-stateless-application-kubernetes


## Validating kubectl on a generic Kubernetes cluster

You also need `kubectl`. If the following command fails, see https://kubernetes.io/docs/tasks/tools/install-kubectl/

    $ kubectl version --client
    Client Version: version.Info{Major:"1", Minor:"19", GitVersion:"v1.19.2",
    GitCommit:"f5743093fd1c663cb0cbc89748f730662345d44d", GitTreeState:"clean",
    BuildDate:"2020-09-16T13:41:02Z", GoVersion:"go1.15", Compiler:"gc", Platform:"linux/amd64"}

A simple way to install it, is using `snap`:

    $ snap install kubectl --classic


In [None]:
# Let's see if you have Kubectl in your environment:
!kubectl version --client

**IMPORTANT You need your `kubectl` be able to access the cluster. Cluster is usually defined by a `kubeconfig` file, which contains the authentication tokens, usually located in `~/.kube/config` on the master node of your Kubernetes cluster. You need to copy its content to the machine where you installed kubectl.**

Here is how you can look at your Kubernetes cluster nodes (once you copied your kubeconfig file):

```
$ kubectl get nodes
NAME             STATUS   ROLES    AGE    VERSION
docker-desktop   Ready    master   120d   v1.16.6-beta.0
```

In [None]:
!kubectl get nodes

Now you can create your private registry access secret, we will call it `secret4acr2infer`.

The simpliest way is to create it passing credentials in the command line like so:

```
$ kubectl create secret docker-registry secret4acr2infer --docker-server=<your-registry-server> --docker-username=<your-name> --docker-password=<your-pword> --docker-email=<your-email>
```

An alternative to that, is to login using Docker and create a secret based on the `~/.docker/config.json`:

```
$ docker login -u <user id we had before> -p <what we had before> <your account it>.azurecr.io
Login Succeeded
$ kubectl create secret generic secret4acr2infer \
    --from-file=.dockerconfigjson=/home/azureuser/.docker/config.json \
    --type=kubernetes.io/dockerconfigjson
secret/secret4acr2infer created
```    
    
For more information, please see [Pull an Image from a Private Registry](https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/)

In [None]:
# You can define your other settings to normalize the parameters across the notebook
secret4acr2infer = "secret4acr2infer"

In [None]:
!kubectl create secret docker-registry $secret4acr2infer --docker-server=$acr_name \
--docker-username=$myusername --docker-password=$mypassword

In [None]:
%%writefile deployment-k8s-template-gpu.yaml
#
# You can deploy this Deployment like so:
#
# $ kubectl create -f deployment-k8s-gpu.yaml
#
# 
apiVersion: apps/v1
kind: Deployment
metadata:
  name: my-infer
  labels:
    app: my-infer
spec:
  replicas: 1
  selector:
    matchLabels:
      app: my-infer
  template:
    metadata:
      labels:
        app: my-infer
    spec:
      containers:
      - name: my-infer
        image: __REGISTRY_IMAGE_LOCATION
        ports:
        # we use only 5001, but the container exposes  EXPOSE 5001 8883 8888
        - containerPort: 5001
        - containerPort: 8883
        - containerPort: 8888
        resources:
          limits:
            # if you know your models minimal requirements, you can control
            # the resource usage here. Some models may not work unless they
            # have enough.
            #
            # memory: "128Mi" #128 MB
            # cpu: "200m" # 200 millicpu (0.2 or 20% of the cpu)
            nvidia.com/gpu:  1
      imagePullSecrets:
        - name: __REGISTRY_SECRET_NAME

Create a deployment_gpu.yaml file using the template and the settings earlier in the notebook.

In [None]:
file = open("deployment-k8s-template-gpu.yaml")

contents = file.read()
contents = contents.replace("__REGISTRY_IMAGE_LOCATION", image_location)
contents = contents.replace("__REGISTRY_SECRET_NAME", secret4acr2infer)

with open("./deployment_gpu.yaml", "wt", encoding="utf-8") as output_file:
    output_file.write(contents)

## Deploying .yaml to your Kubernetes cluster
Then copy your deployment.yaml to the control box of your Kubernetes cluster(using `scp` or any ftp utility you like).

At that machine, you apply the deployment to your cluster, and expose the service like so:
    
```    
    $ kubectl create -f deployment_gpu.yaml
    deployment.apps/my-infer created
    
    $ kubectl expose deployment my-infer --type=LoadBalancer --name=my-service-infer
    service/my-service-infer exposed

    $ kubectl get services
    NAME               TYPE           CLUSTER-IP      EXTERNAL-IP   PORT(S)                                        AGE
    kubernetes         ClusterIP      10.152.183.1    <none>        443/TCP                                        7d19h
    my-service-infer   LoadBalancer   10.152.183.61   <your ip>     5001:30056/TCP,8883:31448/TCP,8888:31236/TCP   4h28m
```

In [None]:
!kubectl create -f deployment_gpu.yaml

In [None]:
!kubectl expose deployment my-infer --type=LoadBalancer --name=my-service-infer

In [None]:
!kubectl get services

# Test the web service
We test the web sevice by passing the test images content.

**You need the `EXTERNAL-IP` value to change from `pending` to an actual ip address before you start using it.**

If you are using your Cluster-IP, you need to be at that cluster. SSH to your master node, for example, and un the following as a stand-alone script there.


In [None]:
import requests

# downloading labels for imagenet that resnet model was trained on
classes_entries = requests.get(
    "https://raw.githubusercontent.com/Lasagne/Recipes/master/examples/resnet50/imagenet_classes.txt"
).text.splitlines()

In [None]:
%%time
import requests

## Run it like so, for example:
# do_inference("snowleopardgaze.jpg", "http://51.141.178.47:5001/score")


def do_inference(myfilename, myscoring_uri):
    test_sample = open(myfilename, "rb").read()
    print("test_sample size is {}".format(len(test_sample)))

    try:
        # eg http://51.141.178.47:5001/score
        scoring_uri = (
            # You can construct your own, passing only the ip in arguments
            # "http://<replace with yout edge device ip address>:5001/score"
            #
            myscoring_uri
        )
        print("scoring_uri is {}".format(scoring_uri))

        # Set the content type
        headers = {"Content-Type": "application/json"}

        # Make the request
        resp = requests.post(scoring_uri, test_sample, headers=headers)

        print("Found a :: " + classes_entries[int(resp.text.strip("[]")) - 1])
    except KeyError as e:
        print(str(e))