<h2 align="center"> Deploy Models with TensorFlow Serving and Docker</h2>

 Load and Preprocess Data

In [None]:
#%%writefile -a train.py
import os
import time
import pandas as pd
import numpy as np

import tensorflow as tf
import tensorflow_hub as hub

In [None]:
def load_dataset(file_path,num_samples):
  df=pd.read_csv(file_path,usecols=[6,9],nrows=num_samples)
  df.columns=['rating','title']
  
  text=df['title'].tolist()
  text=[str(t).encode('ascii','replace') for t in text]
  text=np.array(text,dtype=object)[:]

  labels=df['rating'].tolist()
  labels=[1 if i>=4 else 0 if i==3 else -1 for i in labels]
  labels=np.array(pd.get_dummies(labels),dtype=int)[:]
  return labels,text

In [None]:
#%%writefile -a train.py



### Build the Classification Model using TF Hub

In [None]:
#%%writefile -a train.py

## https://tfhub.dev/google/tf2-preview/nnlm-en-dim50/1
## https://tfhub.dev/google/tf2-preview/nnlm-en-dim128/1
def get_model():
  hub_layer = hub.KerasLayer("https://tfhub.dev/google/tf2-preview/nnlm-en-dim128/1", output_shape=[128],
                           input_shape=[], dtype=tf.string)  #Token based text embedding trained on English Google News 7B corpus

  model = tf.keras.Sequential()
  model.add(hub_layer)
  model.add(tf.keras.layers.Dense(64, activation='relu'))
  model.add(tf.keras.layers.Dense(32, activation='relu'))
  model.add(tf.keras.layers.Dense(3, activation='softmax'))
  model.compile(loss='categorical_crossentropy',optimizer='Adam',metrics=['accuracy'])
  model.summary()
  return model


In [None]:
embed = hub.load("https://tfhub.dev/google/tf2-preview/nnlm-en-dim50/1")
embeddings = embed(["tounsi 7or mala la3b", "this is my project ","testing the embd"])
print(embeddings)


tf.Tensor(
[[ 4.45802003e-01 -1.66882738e-01 -1.25613630e-01  2.52814777e-02
   2.38615081e-01  3.64428103e-01 -1.17987983e-01  2.41861641e-01
  -1.53490186e-01 -9.13521275e-03  3.87853459e-02  3.58332172e-02
  -1.09570436e-01  9.61917341e-02  2.08671808e-01  2.35103816e-02
   3.35804045e-01 -4.76312637e-03 -2.15895921e-02 -7.05669448e-03
  -3.43162902e-02 -3.04552257e-01 -8.96364450e-03 -1.22085251e-02
  -8.14640373e-02 -3.58781815e-01  3.11557241e-02  2.37879902e-02
  -6.94732219e-02 -1.98291108e-01 -6.18666001e-02  2.02120781e-01
  -3.05522382e-02  1.56283751e-02  1.52608529e-01 -7.16896206e-02
  -1.68161184e-01 -1.63737293e-02 -1.99878365e-01  6.45219088e-02
   4.91093919e-02  2.15945631e-01  6.72324374e-02 -7.01343119e-02
   1.17598206e-01 -2.33126760e-01  5.04524931e-02  2.55567849e-01
   1.99451461e-01 -5.53790480e-03]
 [ 1.68188766e-01  1.62894666e-01  2.60049254e-01  9.11546946e-02
  -1.17276795e-01 -2.36244053e-02  1.14455312e-01  1.48555547e-01
  -2.24658251e-01 -9.29626450e

In [None]:
#%%writefile -a train.py

def train(EPOCHS=5,b_size=32,train_file='/content/train (1).csv',val_file='/content/test (1).csv'):
  working_dir=os.getcwd()
  y_train,x_train=load_dataset(train_file,500000)
  y_val,x_val=load_dataset(val_file,50000)

  model=get_model()
  model.fit(x_train,y_train,batch_size=b_size,epochs=EPOCHS,verbose=1,
            validation_data=(x_val,y_val),
            callbacks=[tf.keras.callbacks.ModelCheckpoint(os.path.join(working_dir,'model_checkpoint'),
                                                          monitor='val_loss',vebose=1,save_best_model=True,
                                                          save_weights_only=False,mode='auto')])
  return model




Train and Export Model as Protobuf

In [None]:
#%%writefile -a train.py

def export_model(model,base_path="amazon_review/"):
  path=os.path.join(base_path,str(int(time.time())))
  tf.saved_model.save(model,path)
if __name__=='__main__':
  model=train()
  export_model(model)  



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 keras_layer (KerasLayer)    (None, 128)               124642688 
                                                                 
 dense (Dense)               (None, 64)                8256      
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 dense_2 (Dense)             (None, 3)                 99        
                                                                 
Total params: 124,653,123
Trainable params: 10,435
Non-trainable params: 124,642,688
_________________________________________________________________
Epoch 1/5


INFO:tensorflow:Assets written to: /content/model_checkpoint/assets


Epoch 2/5


INFO:tensorflow:Assets written to: /content/model_checkpoint/assets


Epoch 3/5


INFO:tensorflow:Assets written to: /content/model_checkpoint/assets


Epoch 4/5


INFO:tensorflow:Assets written to: /content/model_checkpoint/assets


Epoch 5/5


INFO:tensorflow:Assets written to: /content/model_checkpoint/assets


INFO:tensorflow:Assets written to: amazon_review/1649202398/assets


INFO:tensorflow:Assets written to: amazon_review/1649202398/assets


### Test Model

In [None]:
negative="bad product"
model.predict([negative])

array([[0.18521847, 0.08230737, 0.7324742 ]], dtype=float32)

In [None]:
positive="love it"
model.predict([positive])

array([[0.10018238, 0.02673815, 0.8730795 ]], dtype=float32)

In [None]:
pip install google-colab-shell


Collecting google-colab-shell
  Downloading google-colab-shell-0.2.tar.gz (4.2 kB)
Building wheels for collected packages: google-colab-shell
  Building wheel for google-colab-shell (setup.py) ... [?25l[?25hdone
  Created wheel for google-colab-shell: filename=google_colab_shell-0.2-py3-none-any.whl size=4124 sha256=0c08595064f7dff2aa987d58dcbbc74f5ec35f91aa6bff4e53bb8bb20c6a2a49
  Stored in directory: /root/.cache/pip/wheels/ee/62/db/d033316a546102d1da60b51a0cc4ddcf0b8c3cc6044469a47b
Successfully built google-colab-shell
Installing collected packages: google-colab-shell
Successfully installed google-colab-shell-0.2


In [None]:
from google_colab_shell import getshell


In [None]:
getshell()


In [None]:
!zip -r /content/amazon.zip /content/amazon_review/


  adding: content/amazon_review/ (stored 0%)
  adding: content/amazon_review/1649202398/ (stored 0%)
  adding: content/amazon_review/1649202398/assets/ (stored 0%)
  adding: content/amazon_review/1649202398/assets/tokens.txt (deflated 43%)
  adding: content/amazon_review/1649202398/variables/ (stored 0%)
  adding: content/amazon_review/1649202398/variables/variables.data-00000-of-00001 (deflated 7%)
  adding: content/amazon_review/1649202398/variables/variables.index (deflated 62%)
  adding: content/amazon_review/1649202398/saved_model.pb (deflated 86%)


### Task : TensorFlow Serving with Docker

`docker pull tensorflow/serving`

`docker run -p 8500:8500 \
            -p 8501:8501 \
            --mount type=bind,\
            source=amazon_review/,\
            target=/models/amazon_review \
            -e MODEL_NAME=amazon_review \
            -t tensorflow/serving`

#### Perform Model Prediction

##### Support for gRPC and REST

- TensorFlow Serving supports
    - Remote Procedure Protocal (gRPC)
    - Representational State Transfer (REST)
- Consistent API structures
- Server supports both standards simultaneously
- Default ports:
    - RPC: 8500
    - REST: 8501

In [None]:
%%writefile tf_serving_rest_client_v2.py
import json
import requests
import sys

def get_rest_url(model_name, host='192.168.1.103', port='8501', verb='predict', version=None):
    """ generate the URL path"""
    url = "http://{host}:{port}/v1/models/{model_name}".format(host=host, port=port, model_name=model_name)
    if version:
        url += 'versions/{version}'.format(version=version)
    url += ':{verb}'.format(verb=verb)
    return url


def get_model_prediction(model_input, model_name='amazon_review', signature_name='serving_default'):
    """ no error handling at all, just poc"""

    url = get_rest_url(model_name)
    #In the row format, inputs are keyed to instances key in the JSON request.
    #When there is only one named input, specify the value of instances key to be the value of the input:
    data = {"instances": [model_input]}
    
    rv = requests.post(url, data=json.dumps(data))
    if rv.status_code != requests.codes.ok:
        rv.raise_for_status()
    
    return rv.json()['predictions']

if __name__ == '__main__':

    print("\nGenerate REST url ...")
    url = get_rest_url(model_name='amazon_review')
    print(url)
    
    while True:
        print("\nEnter an Amazon review [:q for Quit]")
        if sys.version_info[0] <= 3:
            sentence = input()
        if sentence == ':q':
            break
        model_input = sentence
        model_prediction = get_model_prediction(model_input)
        print("The model predicted ...")
        print(model_prediction)

Writing tf_serving_rest_client_v2.py


#### Predictions via gRPC

More sophisticated client-server connections

- Prediction data has to be converted to the Protobuf format
- Request types have designated types, e.g. float, int, bytes
- Payloads need to be converted to base64
- Connect to the server via gRPC stubs

#### gRPC vs REST: When to use which API standard

- Rest is easy to implement and debug
- RPC is more network efficient, smaller payloads
- RPC can provide much faster inferences!

In [None]:
%%writefile tf_serving_grpc_client.py
import sys
import grpc
from grpc.beta import implementations
import tensorflow as tf
from tensorflow_serving.apis import predict_pb2
from tensorflow_serving.apis import prediction_service_pb2, get_model_metadata_pb2
from tensorflow_serving.apis import prediction_service_pb2_grpc


def get_stub(host='127.0.0.1', port='8500'):
    channel = grpc.insecure_channel('127.0.0.1:8500') 
    stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
    return stub


def get_model_prediction(model_input, stub, model_name='amazon_review', signature_name='serving_default'):
    """ no error handling at all, just poc"""
    request = predict_pb2.PredictRequest()
    request.model_spec.name = model_name
    request.model_spec.signature_name = signature_name
    request.inputs['input_input'].CopyFrom(tf.make_tensor_proto(model_input))
    response = stub.Predict.future(request, 5.0)  # 5 seconds
    return response.result().outputs["output"].float_val


def get_model_version(model_name, stub):
    request = get_model_metadata_pb2.GetModelMetadataRequest()
    request.model_spec.name = 'amazon_review'
    request.metadata_field.append("signature_def")
    response = stub.GetModelMetadata(request, 10)
    # signature of loaded model is available here: response.metadata['signature_def']
    return response.model_spec.version.value

if __name__ == '__main__':
    print("\nCreate RPC connection ...")
    stub = get_stub()
    while True:
        print("\nEnter an Amazon review [:q for Quit]")
        if sys.version_info[0] <= 3:
            sentence = raw_input() if sys.version_info[0] < 3 else input()
        if sentence == ':q':
            break
        model_input = [sentence]
        model_prediction = get_model_prediction(model_input, stub)
        print("The model predicted ...")
        print(model_prediction)

Writing tf_serving_grpc_client.py
