# Tensorflow serving


In [1]:
try:
    %tensorflow_version 2.x
except:
    pass

## Setup

In [17]:
import os
import sys

import tensorflow as tf
print(tf.__version__)
from tensorflow.python.ops import lookup_ops

import mnist_input_data


FLAGS = {'training_iteration': 10000, 
         'model_version': 2, 
         'work_dir': '/tmp'
        }

tf.compat.v1.disable_eager_execution()

2.3.0


## Load data and Build Model for Train

In [18]:

print('Training model...')
mnist = mnist_input_data.read_data_sets(FLAGS['work_dir'], one_hot=True)

sess = tf.compat.v1.InteractiveSession()
serialized_tf_example = tf.compat.v1.placeholder(tf.string, name='tf_example')
feature_configs = {
  'x': tf.io.FixedLenFeature(shape=[784], dtype=tf.float32),
}
tf_example = tf.io.parse_example(serialized_tf_example, feature_configs)

x = tf.identity(tf_example['x'], name='x')  # use tf.identity() to assign name
y_ = tf.compat.v1.placeholder('float', shape=[None, 10])
w = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))

Training model...
Extracting /tmp/train-images-idx3-ubyte.gz
Extracting /tmp/train-labels-idx1-ubyte.gz
Extracting /tmp/t10k-images-idx3-ubyte.gz
Extracting /tmp/t10k-labels-idx1-ubyte.gz




## Train and Evaluate model

In [19]:
sess.run(tf.compat.v1.global_variables_initializer())
y = tf.nn.softmax(tf.matmul(x, w) + b, name='y')
cross_entropy = -tf.math.reduce_sum(y_ * tf.math.log(y))
train_step = tf.compat.v1.train.GradientDescentOptimizer(0.01).minimize(
  cross_entropy)
values, indices = tf.nn.top_k(y, 10)
table = lookup_ops.index_to_string_table_from_tensor(tf.constant([str(i) for i in range(10)]))
prediction_classes = table.lookup(tf.dtypes.cast(indices, tf.int64))
for _ in range(FLAGS['training_iteration']):
    batch = mnist.train.next_batch(50)
    train_step.run(feed_dict={x: batch[0], y_: batch[1]})
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.math.reduce_mean(tf.cast(correct_prediction, 'float'))
print('training accuracy %g' % sess.run(
  accuracy, feed_dict={
      x: mnist.test.images,
      y_: mnist.test.labels
  }))
print('Done training!')


training accuracy 0.9115
Done training!


## Export the model

In [20]:

export_path_base = 'mnist_model'
export_path = os.path.join(
    tf.compat.as_bytes(export_path_base),
    tf.compat.as_bytes(str(FLAGS['model_version'])))
print('Exporting trained model to', export_path)


builder = tf.compat.v1.saved_model.builder.SavedModelBuilder(export_path)

# Build the signature_def_map.
classification_inputs = tf.compat.v1.saved_model.utils.build_tensor_info(serialized_tf_example)

classification_outputs_classes = tf.compat.v1.saved_model.utils.build_tensor_info(prediction_classes)
classification_outputs_scores = tf.compat.v1.saved_model.utils.build_tensor_info(values)

classification_signature = (
    tf.compat.v1.saved_model.signature_def_utils.build_signature_def(
        inputs={
            tf.compat.v1.saved_model.signature_constants.CLASSIFY_INPUTS:
                classification_inputs
        },
        outputs={
            tf.compat.v1.saved_model.signature_constants
            .CLASSIFY_OUTPUT_CLASSES:
                classification_outputs_classes,
            tf.compat.v1.saved_model.signature_constants
            .CLASSIFY_OUTPUT_SCORES:
                classification_outputs_scores
        },
        method_name=tf.compat.v1.saved_model.signature_constants
        .CLASSIFY_METHOD_NAME))

tensor_info_x = tf.compat.v1.saved_model.utils.build_tensor_info(x)
tensor_info_y = tf.compat.v1.saved_model.utils.build_tensor_info(y)

prediction_signature = (
    tf.compat.v1.saved_model.signature_def_utils.build_signature_def(
        inputs={'images': tensor_info_x},
        outputs={'scores': tensor_info_y},
        method_name=tf.compat.v1.saved_model.signature_constants
        .PREDICT_METHOD_NAME))

builder.add_meta_graph_and_variables(
    sess, [tf.compat.v1.saved_model.tag_constants.SERVING],
    signature_def_map={
        'predict_images':
            prediction_signature,
        tf.compat.v1.saved_model.signature_constants
        .DEFAULT_SERVING_SIGNATURE_DEF_KEY:
            classification_signature,
    },
    #main_op=tf.compat.v1.tables_initializer(),
    strip_default_attrs=True
    )

builder.save()

print('Done exporting!')


Exporting trained model to b'mnist_model/2'
INFO:tensorflow:No assets to save.


INFO:tensorflow:No assets to save.


INFO:tensorflow:No assets to write.


INFO:tensorflow:No assets to write.


INFO:tensorflow:SavedModel written to: mnist_model/2/saved_model.pb


INFO:tensorflow:SavedModel written to: mnist_model/2/saved_model.pb


Done exporting!


## Examine Your Saved Model

This is simplest way to see `MetaGraphDefs` and `SignatureDefs` in our SavedModel.

In [6]:
!saved_model_cli show --dir {export_path.decode()} --all

2020-08-15 09:43:44.276955: W tensorflow/stream_executor/platform/default/dso_loader.cc:59] Could not load dynamic library 'libcudart.so.10.1'; dlerror: libcudart.so.10.1: cannot open shared object file: No such file or directory
2020-08-15 09:43:44.276996: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.

MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:

signature_def['predict_images']:
  The given SavedModel SignatureDef contains the following input(s):
    inputs['images'] tensor_info:
        dtype: DT_FLOAT
        shape: unknown_rank
        name: x:0
  The given SavedModel SignatureDef contains the following output(s):
    outputs['scores'] tensor_info:
        dtype: DT_FLOAT
        shape: unknown_rank
        name: y:0
  Method name is: tensorflow/serving/predict

signature_def['serving_default']:
  The given SavedModel SignatureDef contains the following input(s):
    inp

## Run the TensorFlow Model Server

We will now launch the TensorFlow model server with a bash script. We will use the argument `--bg` to run the script in the background.

Our script will start running TensorFlow Serving and will load our model. Here are the parameters we will use:

* `rest_api_port`: The port that you'll use for requests.


* `model_name`: You'll use this in the URL of your requests.  It can be anything.


* `model_base_path`: This is the path to the directory where you've saved your model.

Also, because the variable that points to the directory containing the model is in Python, we need a way to tell the bash script where to find the model. To do this, we will write the value of the Python variable to an environment variable using the `os.environ` function.

In [7]:
%%bash --bg 
nohup tensorflow_model_server \
  --rest_api_port=8501 \
  --model_name=mnist \
  --model_base_path=/home/mohadese/workspace/tfserving/mnist_model >server.log 2>&1
  

In [10]:
!tail server.log 

2020-08-15 09:51:45.898871: E tensorflow_serving/sources/storage_path/file_system_storage_path_source.cc:362] FileSystemStoragePathSource encountered a filesystem access error: Could not find base path /home/mohadese/worksapce/tfserving/helloworld for servable helloworld
2020-08-15 09:51:46.898999: E tensorflow_serving/sources/storage_path/file_system_storage_path_source.cc:362] FileSystemStoragePathSource encountered a filesystem access error: Could not find base path /home/mohadese/worksapce/tfserving/helloworld for servable helloworld
2020-08-15 09:51:47.899030: E tensorflow_serving/sources/storage_path/file_system_storage_path_source.cc:362] FileSystemStoragePathSource encountered a filesystem access error: Could not find base path /home/mohadese/worksapce/tfserving/helloworld for servable helloworld
2020-08-15 09:51:48.899101: E tensorflow_serving/sources/storage_path/file_system_storage_path_source.cc:362] FileSystemStoragePathSource encountered a filesystem access error: Coul

## Inference With Rest

### Default url structure
```
http://{HOST}:{PORT}/v1/models/{MODEL_NAME}
```

### Model status API
```
GET http://host:port/v1/models/${MODEL_NAME}[/versions/${VERSION}|/labels/${LABEL}]
```

In [11]:
import requests
json_response = requests.get('http://localhost:8501/v1/models/mnist/metadata')
print(json_response.text)

{
"model_spec":{
 "name": "mnist",
 "signature_name": "",
 "version": "1"
}
,
"metadata": {"signature_def": {
 "signature_def": {
  "serving_default": {
   "inputs": {
    "inputs": {
     "dtype": "DT_STRING",
     "tensor_shape": {
      "dim": [],
      "unknown_rank": true
     },
     "name": "tf_example:0"
    }
   },
   "outputs": {
    "classes": {
     "dtype": "DT_STRING",
     "tensor_shape": {
      "dim": [],
      "unknown_rank": true
     },
     "name": "hash_table_Lookup/LookupTableFindV2:0"
    },
    "scores": {
     "dtype": "DT_FLOAT",
     "tensor_shape": {
      "dim": [],
      "unknown_rank": true
     },
     "name": "TopKV2:0"
    }
   },
   "method_name": "tensorflow/serving/classify"
  },
  "predict_images": {
   "inputs": {
    "images": {
     "dtype": "DT_FLOAT",
     "tensor_shape": {
      "dim": [],
      "unknown_rank": true
     },
     "name": "x:0"
    }
   },
   "outputs": {
    "scores": {
     "dtype": "DT_FLOAT",
     "tensor_shape": {
      "

In [12]:
import cv2
import numpy as np


img = cv2.imread('4.png', 0)

img = cv2.resize(img, (28, 28), None)
img = img / 255

images = img.reshape((1, img.shape[0] * img.shape[1]))


## Make Inference Request

Finally, we can make the inference request and get the inferences back. We'll send a predict request as a POST to our server's REST endpoint, and pass it our test data. We'll ask our server to give us the latest version of our model by not specifying a particular version. The response will be a JSON payload containing the predictions.

In [23]:
import json
import requests

data = json.dumps({"signature_name": "predict_images", "inputs": images.tolist()})
headers = {"content-type": "application/json"}

In [24]:
json_response = requests.post('http://localhost:8501/v1/models/mnist/versions/2:predict', data=data, headers=headers)
print(json_response.text)


{
    "outputs": [
        [
            1.86367333e-06,
            6.88217572e-09,
            1.22077836e-05,
            8.44899841e-06,
            0.997466087,
            0.00016840981,
            3.71115166e-05,
            3.25779365e-05,
            3.70204907e-05,
            0.00223625265
        ]
    ]
}


### Serving multiple version

simple config file

In [16]:
!cat config_file.txt


model_config_list: {

  config: {
    name:  "mnist",
    base_path:  "/home/mohadese/workspace/tfserving/mnist_model",
    model_platform: "tensorflow",
    model_version_policy: {
        specific {
            versions: 1
            versions: 2
                  }
    
    }
    version_labels {
        key: 'stable'
        value: 1
    }
 },
     
}


In [28]:
%%bash --bg 
nohup tensorflow_model_server \
  --rest_api_port=8501 \
  --allow_version_labels_for_unavailable_models=true \
  --model_config_file=config_file.txt >server.log 2>&1
  

In [30]:
!tail -n 50 server.log

2020-08-14 19:29:48.541979: I tensorflow_serving/model_servers/server_core.cc:464] Adding/updating models.
2020-08-14 19:29:48.542035: I tensorflow_serving/model_servers/server_core.cc:575]  (Re-)adding model: mnist
2020-08-14 19:29:48.642506: I tensorflow_serving/core/basic_manager.cc:739] Successfully reserved resources to load servable {name: mnist version: 2}
2020-08-14 19:29:48.642562: I tensorflow_serving/core/loader_harness.cc:66] Approving load for servable version {name: mnist version: 2}
2020-08-14 19:29:48.642583: I tensorflow_serving/core/loader_harness.cc:74] Loading servable version {name: mnist version: 2}
2020-08-14 19:29:48.642652: I external/org_tensorflow/tensorflow/cc/saved_model/reader.cc:31] Reading SavedModel from: /home/mohadese/workspace/tfserving/mnist_model/2
2020-08-14 19:29:48.645308: I external/org_tensorflow/tensorflow/cc/saved_model/reader.cc:54] Reading meta graph with tags { serve }
2020-08-14 19:29:48.645348: I external/org_tensorflow/tensorflo

### simple Rest url

In [22]:
import requests
json_response = requests.get('http://localhost:8501/v1/models/mnist/versions/2')
print(json_response.text)

{
 "model_version_status": [
  {
   "version": "2",
   "state": "AVAILABLE",
   "status": {
    "error_code": "OK",
    "error_message": ""
   }
  }
 ]
}



In [8]:
import json
import requests
import cv2
import base64
import numpy as np


image = cv2.imread('4.png', 0)

img = cv2.resize(image, (28, 28), None)
img = img / 255
images = img.reshape((img.shape[0] * img.shape[1]))
#image_content = base64.b64encode(open(image,'rb').read()).decode("utf-8")
image = images.astype(np.float32)
data = json.dumps({"signature_name": "serving_default", "examples": [
                {"x": image.tolist()}
                ]})

headers = {"content-type": "application/json"}

In [9]:
json_response = requests.post('http://localhost:8501/v1/models/mnist:classify', data=data, headers=headers)
print(json_response.text)


{
    "error": "Table not initialized.\n\t [[{{node hash_table_Lookup/LookupTableFindV2}}]]"
}


## Multiple Model

In [26]:
!saved_model_cli show --dir /home/mohadese/workspace/tfserving/vgg_model/1 --all

2020-08-15 10:05:06.886289: W tensorflow/stream_executor/platform/default/dso_loader.cc:59] Could not load dynamic library 'libcudart.so.10.1'; dlerror: libcudart.so.10.1: cannot open shared object file: No such file or directory
2020-08-15 10:05:06.886322: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.

MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:

signature_def['__saved_model_init_op']:
  The given SavedModel SignatureDef contains the following input(s):
  The given SavedModel SignatureDef contains the following output(s):
    outputs['__saved_model_init_op'] tensor_info:
        dtype: DT_INVALID
        shape: unknown_rank
        name: NoOp
  Method name is: 

signature_def['serving_default']:
  The given SavedModel SignatureDef contains the following input(s):
    inputs['input_1'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 224, 224, 3)
        name: servin

In [71]:
%%bash --bg 
nohup tensorflow_model_server \
  --rest_api_port=8501 \
  --model_config_file=config_file.txt >server.log 2>&1
  

In [76]:
!tail server.log

2020-08-14 12:28:36.057927: I tensorflow_serving/core/loader_harness.cc:87] Successfully loaded servable version {name: mnist version: 1}
2020-08-14 12:28:38.143179: W external/org_tensorflow/tensorflow/core/framework/cpu_allocator_impl.cc:81] Allocation of 16384000 exceeds 10% of free system memory.
2020-08-14 12:28:55.426525: I external/org_tensorflow/tensorflow/cc/saved_model/loader.cc:183] Running initialization op on SavedModel bundle at path: /home/mohadese/workspace/tfserving/vgg_model/1
2020-08-14 12:28:58.005363: I external/org_tensorflow/tensorflow/cc/saved_model/loader.cc:364] SavedModel load for tags { serve }; Status: success: OK. Took 24028783 microseconds.
2020-08-14 12:28:58.258202: I tensorflow_serving/servables/tensorflow/saved_model_warmup.cc:105] No warmup data file found at /home/mohadese/workspace/tfserving/vgg_model/1/assets.extra/tf_serving_warmup_requests
2020-08-14 12:28:58.492738: I tensorflow_serving/core/loader_harness.cc:87] Successfully loaded servab

In [31]:
import cv2
import numpy as np
import json
import requests

    
img = cv2.imread('cat.jpg')
img = cv2.resize(img, (224, 224), None)
images = img.reshape((1, img.shape[0] ,img.shape[1], 3))

data = json.dumps({"signature_name": "serving_default", "inputs": images.tolist()})
headers = {"content-type": "application/json"}

In [27]:
json_response = requests.get('http://localhost:8501/v1/models/vgg/metadata')


In [28]:
print(json_response.text)

{
"model_spec":{
 "name": "vgg",
 "signature_name": "",
 "version": "1"
}
,
"metadata": {"signature_def": {
 "signature_def": {
  "serving_default": {
   "inputs": {
    "input_1": {
     "dtype": "DT_FLOAT",
     "tensor_shape": {
      "dim": [
       {
        "size": "-1",
        "name": ""
       },
       {
        "size": "224",
        "name": ""
       },
       {
        "size": "224",
        "name": ""
       },
       {
        "size": "3",
        "name": ""
       }
      ],
      "unknown_rank": false
     },
     "name": "serving_default_input_1:0"
    }
   },
   "outputs": {
    "predictions": {
     "dtype": "DT_FLOAT",
     "tensor_shape": {
      "dim": [
       {
        "size": "-1",
        "name": ""
       },
       {
        "size": "1000",
        "name": ""
       }
      ],
      "unknown_rank": false
     },
     "name": "StatefulPartitionedCall:0"
    }
   },
   "method_name": "tensorflow/serving/predict"
  },
  "__saved_model_init_op": {
   "inputs": {

In [32]:
json_response = requests.post('http://localhost:8501/v1/models/vgg:predict', data=data, headers=headers)
print(json_response.text)


{
    "outputs": [
        [
            1.59049208e-07,
            7.59523516e-07,
            4.64598344e-07,
            3.72798127e-06,
            1.52301504e-06,
            4.29905754e-07,
            6.8224054e-08,
            4.09062386e-06,
            4.02476689e-06,
            4.43094768e-06,
            3.44549858e-06,
            1.14350541e-06,
            7.22129244e-06,
            7.65616642e-06,
            1.23982238e-06,
            6.24071117e-06,
            2.42881833e-06,
            9.03172258e-06,
            2.15136629e-06,
            1.58441912e-06,
            7.47861213e-06,
            1.0679878e-06,
            4.08120769e-07,
            3.99873358e-07,
            2.80198265e-06,
            1.50720621e-06,
            4.79882556e-06,
            2.1566334e-06,
            1.84966223e-06,
            1.27574485e-05,
            2.81432835e-06,
            1.67622591e-06,
            8.59254556e-07,
            1.89304899e-06,
            9.15593773

### gRPC inference

To run Python client code without the need to build the API, you can install the tensorflow-serving-api PIP package using:

In [1]:
!pip3 install tensorflow-serving-api

Collecting tensorflow-serving-api
  Using cached https://files.pythonhosted.org/packages/72/0d/0f0822b418fd51795a4768f35bb17619af51312ca9f5762c80bea34bd7ae/tensorflow_serving_api-2.3.0-py2.py3-none-any.whl
Collecting protobuf>=3.6.0 (from tensorflow-serving-api)
  Using cached https://files.pythonhosted.org/packages/63/14/dc43f81adc543c435cfeb45dd4ac048a97a1eb621c2ccb68ab3d15118737/protobuf-3.12.4-cp36-cp36m-manylinux1_x86_64.whl
Collecting tensorflow<3,>=2.3 (from tensorflow-serving-api)
  Using cached https://files.pythonhosted.org/packages/97/ae/0b08f53498417914f2274cc3b5576d2b83179b0cbb209457d0fde0152174/tensorflow-2.3.0-cp36-cp36m-manylinux2010_x86_64.whl
Collecting grpcio>=1.0<2 (from tensorflow-serving-api)
[?25l  Downloading https://files.pythonhosted.org/packages/2f/48/5aae2b4f415cdab711ec9ec762f433b5d55184ec6e91afa3bc1092d1d0ab/grpcio-1.31.0-cp36-cp36m-manylinux2010_x86_64.whl (3.3MB)
[K    100% |████████████████████████████████| 3.3MB 634kB/s ta 0:00:011
Collecting keras-p

Collecting pyasn1<0.5.0,>=0.4.6 (from pyasn1-modules>=0.2.1->google-auth<2,>=1.6.3->tensorboard<3,>=2.3.0->tensorflow<3,>=2.3->tensorflow-serving-api)
  Using cached https://files.pythonhosted.org/packages/62/1e/a94a8d635fa3ce4cfc7f506003548d0a2447ae76fd5ca53932970fe3053f/pyasn1-0.4.8-py2.py3-none-any.whl
Collecting oauthlib>=3.0.0 (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard<3,>=2.3.0->tensorflow<3,>=2.3->tensorflow-serving-api)
  Using cached https://files.pythonhosted.org/packages/05/57/ce2e7a8fa7c0afb54a0581b14a65b56e62b5759dbc98e80627142b8a3704/oauthlib-3.1.0-py2.py3-none-any.whl
[31mtensorboard 2.3.0 has requirement setuptools>=41.0.0, but you'll have setuptools 40.7.2 which is incompatible.[0m
[31mtensorflow 2.3.0 has requirement six>=1.12.0, but you'll have six 1.11.0 which is incompatible.[0m
Installing collected packages: protobuf, keras-preprocessing, termcolor, absl-py, tensorboard-plugin-wit, zipp, importlib-metadata, markdown, requests

In [33]:
import grpc
from grpc.beta import implementations

from tensorflow.core.framework import types_pb2
from tensorflow_serving.apis import predict_pb2
from tensorflow_serving.apis import prediction_service_pb2_grpc
from tensorflow_serving.apis import prediction_service_pb2

In [36]:
import requests
json_response = requests.get('http://localhost:9001/v1/models/vgg/metadata')
print(json_response.text)

{
"model_spec":{
 "name": "vgg",
 "signature_name": "",
 "version": "1"
}
,
"metadata": {"signature_def": {
 "signature_def": {
  "serving_default": {
   "inputs": {
    "input_1": {
     "dtype": "DT_FLOAT",
     "tensor_shape": {
      "dim": [
       {
        "size": "-1",
        "name": ""
       },
       {
        "size": "224",
        "name": ""
       },
       {
        "size": "224",
        "name": ""
       },
       {
        "size": "3",
        "name": ""
       }
      ],
      "unknown_rank": false
     },
     "name": "serving_default_input_1:0"
    }
   },
   "outputs": {
    "predictions": {
     "dtype": "DT_FLOAT",
     "tensor_shape": {
      "dim": [
       {
        "size": "-1",
        "name": ""
       },
       {
        "size": "1000",
        "name": ""
       }
      ],
      "unknown_rank": false
     },
     "name": "StatefulPartitionedCall:0"
    }
   },
   "method_name": "tensorflow/serving/predict"
  },
  "__saved_model_init_op": {
   "inputs": {

In [38]:
import cv2

img = cv2.imread('cat.jpeg')
img = cv2.resize(img, (224, 224))
img = img.astype(np.float32)
request = predict_pb2.PredictRequest()
request.model_spec.name = 'vgg'
request.model_spec.signature_name = 'serving_default'
request.inputs['input_1'].CopyFrom(tf.make_tensor_proto(np.array([img]), dtype=types_pb2.DT_FLOAT))


channel = grpc.insecure_channel('0.0.0.0:9000')
stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)

    
result = stub.Predict(request, 10)  # 10 secs timeout
to_decode = np.expand_dims(result.outputs['predictions'].float_val, axis=0)
decoded = tf.keras.applications.imagenet_utils.decode_predictions(to_decode, 5)
print(decoded)

[[('n02124075', 'Egyptian_cat', 0.6222670078277588), ('n02123045', 'tabby', 0.3161282241344452), ('n02123159', 'tiger_cat', 0.06054197624325752), ('n02971356', 'carton', 0.00029843984520994127), ('n02127052', 'lynx', 0.00024288639542646706)]]


## Reference

https://www.tensorflow.org/tfx/guide/serving