<h2 align="center"> Deploy Models with TensorFlow Serving and Docker</h2>

### Task 2: Load and Preprocess Data

In [1]:
# !pip install tensorflow_hub

In [2]:
#%%writefile -a train.py
import os
import time
import pandas as pd
import numpy as np

import tensorflow as tf
import tensorflow_hub as hub

In [3]:
#Souce: https://www.kaggle.com/snap/amazon-fine-food-reviews/data
# !head -n 2 train.csv

In [4]:
#%%writefile -a train.py
def load_dataset(file_path,num_samples):
    df = pd.read_csv(file_path,usecols = [6,9],nrows=num_samples)
    
    df.columns = ['rating','title']
    
    text = df['title'].to_list()
    text = [str(t).encode('ascii','replace') for t in text]
    text = np.array(text,dtype=object)[:]
    
    labels = df['rating'].tolist()
    labels  = [1 if i>=4 else 0 if i==3 else -1 for i in labels]
    labels = np.array(pd.get_dummies(labels),dtype=int)[:]
    
    return labels,text

In [5]:
tmp_lables,tmp_text = load_dataset('train.csv',100)

In [6]:
tmp_text.shape

(100,)

In [7]:
tmp_lables[:10]

array([[0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [1, 0, 0],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1]])

### Task 3: Build the Classification Model using TF Hub

In [8]:
#%%writefile -a train.py

## https://tfhub.dev/google/tf2-preview/nnlm-en-dim50/1
## https://tfhub.dev/google/tf2-preview/nnlm-en-dim128/1

def get_model():
    hub_layer = hub.KerasLayer("https://tfhub.dev/google/tf2-preview/nnlm-en-dim50/1", 
                               output_shape=[50],input_shape=[], dtype=tf.string,
                              trainable= False)
    model = tf.keras.Sequential()
    model.add(hub_layer)
    model.add(tf.keras.layers.Dense(16, activation='relu'))
    model.add(tf.keras.layers.Dense(3, activation='softmax',name="output"))
    model.compile(loss="categorical_crossentropy",
                 optimizer= "Adam",metrics=['accuracy'])

    model.summary()
    
    return model 

In [9]:
model = get_model()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 keras_layer (KerasLayer)    (None, 50)                48190600  
                                                                 
 dense (Dense)               (None, 16)                816       
                                                                 
 output (Dense)              (None, 3)                 51        
                                                                 
Total params: 48,191,467
Trainable params: 867
Non-trainable params: 48,190,600
_________________________________________________________________


In [10]:

embed = hub.load("https://tfhub.dev/google/tf2-preview/nnlm-en-dim50/1")
embeddings = embed(["cat is on the mat", "dog is in the fog"])


In [11]:
embeddings.shape

TensorShape([2, 50])

### Task 4: Define Training Procedure

In [12]:
#%%writefile -a train.py
def train(EPOCHS=2, BATCH_SIZE=32, TRAIN_FILE="train.csv",VAL_FILE='test.csv'):
    WORKING_DIR = os.getcwd()
    print("Loading training/validation data")
    y_train,x_train = load_dataset(TRAIN_FILE, num_samples=100000)
    y_val, x_val = load_dataset(VAL_FILE, num_samples=10000)
    print('Training the model ...')
    model = get_model()
    
    model.fit(x_train, y_train, batch_size=BATCH_SIZE,epochs=EPOCHS,verbose=1,
    validation_data=(x_val, y_val),
    callbacks=[tf.keras.callbacks.ModelCheckpoint(os.path.join(WORKING_DIR,'model_checkpoint'),
    monitor='val_loss',
    verbose = 1,
    save_best_model=True,
    save_weights_only=False,
    mode= 'auto' )])
    
    return model

In [15]:
# train()

### Task 5: Train and Export Model as Protobuf

In [16]:
#%%writefile -a train.py

def export_model(model,base_path="amazon_review/"):
    path = os.path.join(base_path,str(int(time.time())))
    tf.saved_model.save(model,path)
    
if __name__ == "__main__":      
        model = train()
        export_model(model)


Loading training/validation data
Training the model ...
Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 keras_layer_2 (KerasLayer)  (None, 50)                48190600  
                                                                 
 dense_2 (Dense)             (None, 16)                816       
                                                                 
 output (Dense)              (None, 3)                 51        
                                                                 
Total params: 48,191,467
Trainable params: 867
Non-trainable params: 48,190,600
_________________________________________________________________
Epoch 1/2
Epoch 1: saving model to E:\MyFiles\WorkSpace\MLOps\TensorFlow Serving with Docker for Model Deployment\model_checkpoint
INFO:tensorflow:Assets written to: E:\MyFiles\WorkSpace\MLOps\TensorFlow Serving with Docker for Model Deployment\mod

INFO:tensorflow:Assets written to: E:\MyFiles\WorkSpace\MLOps\TensorFlow Serving with Docker for Model Deployment\model_checkpoint\assets


Epoch 2/2
Epoch 2: saving model to E:\MyFiles\WorkSpace\MLOps\TensorFlow Serving with Docker for Model Deployment\model_checkpoint
INFO:tensorflow:Assets written to: E:\MyFiles\WorkSpace\MLOps\TensorFlow Serving with Docker for Model Deployment\model_checkpoint\assets


INFO:tensorflow:Assets written to: E:\MyFiles\WorkSpace\MLOps\TensorFlow Serving with Docker for Model Deployment\model_checkpoint\assets


INFO:tensorflow:Assets written to: amazon_review/1648864296\assets


INFO:tensorflow:Assets written to: amazon_review/1648864296\assets


### Task 6: Test Model

#### Negative Review:

In [17]:
test_sentence = "horrible book, waste of time"
model.predict([test_sentence])

array([[0.43567666, 0.08142588, 0.4828974 ]], dtype=float32)

#### Positive Review:

In [18]:
test_sentence = "Awesome books"
model.predict([test_sentence])

array([[0.09453165, 0.03179249, 0.8736758 ]], dtype=float32)

### Task : TensorFlow Serving with Docker

The command to pull the tensorflow/serving image
```bash
docker pull tensorflow/serving
```

```bash
docker run -p 8500:8500 \
            -p 8501:8501 \
            --mount type=bind,\
            source=`pwd`/amazon_review/,\ # docker 会寻找最新的文件下的model
            target=/models/amazon_review \
            -e MODEL_NAME=amazon_review \
            -t tensorflow/serving
```

- 通过-p把docker的8501映射成Host的8501端口(而8500没有映射，因此8500的gRPC接口只能在Docker内部访问，而不能在Host以及其它机器上访问)

- 同时我们把Host的路径/path/to/my_model/映射到docker里的/models/my_model，

- 接着我们指定环境变量MODEL_NAME为my_model

- 最后-t启动tensorflow/serving这个镜像。

- 根据上面的描述，我们把model_name设置成立amazon_review,而MODEL_BASE_PATH是默认的/models，因此tensorflow_model_server命令会去/models/amazon_review下寻找模型
- 根据前面的–mount的bind，/models/amazon_review是Host机器的amazon_review/,也就是实际模型存放的位置。

- 如果我们想让外部可以通过gRPC访问，那么可以增加-p 8500:8500。当然我们不一定要使得Host的端口是8500，也可以是-p 12345:8500。

```bash
rhyme@ip-10-199-92-231:~/Desktop/Project$ docker run -p 8500:8500 -p 8501:8501 --mount type=bind,source=`pwd`/amazon_review/,target=/models/amazon_review -e MODEL_NAME=amazon_review -t tensorflow/serving
2022-04-02 01:45:02.081597: I tensorflow_serving/model_servers/server.cc:89] Building single TensorFlow model file config:  model_name: amazon_review model_base_path: /models/amazon_review
2022-04-02 01:45:02.081939: I tensorflow_serving/model_servers/server_core.cc:465] Adding/updating models.
2022-04-02 01:45:02.081973: I tensorflow_serving/model_servers/server_core.cc:591]  (Re-)adding model: amazon_review
2022-04-02 01:45:02.187726: I tensorflow_serving/core/basic_manager.cc:740] Successfully reserved resources to load servable {name: amazon_review version: 1648863436}
2022-04-02 01:45:02.187780: I tensorflow_serving/core/loader_harness.cc:66] Approving load for servable version {name: amazon_review version: 1648863436}
2022-04-02 01:45:02.187802: I tensorflow_serving/core/loader_harness.cc:74] Loading servable version {name: amazon_review version: 1648863436}
 ```

### Task : Setup a REST Client to Perform Model Predictions

#### Perform Model Prediction

##### Support for gRPC and REST

- TensorFlow Serving supports
    - Remote Procedure Protocal (gRPC)
    - Representational State Transfer (REST)
- Consistent API structures
- Server supports both standards simultaneously
- Default ports:
    - RPC: 8500
    - REST: 8501

#### Predictions via REST

- Standard HTTP POST requests
- Response is a JSON body with the prediction
- Request from the default or specific model

Default URI scheme:

`http://{HOST}:{PORT}/v1/models/{MODEL_NAME}`

Specific model versions:

`http://{HOST}:{PORT}/v1/models/{MODEL_NAME}[/versions/{MODEL_VERSION}]:predict`

In [21]:
%%writefile tf_serving_rest_client.py
import json
import requests
import sys

def get_rest_url(model_name, host='127.0.0.1', port='8501', verb='predict', version=None):
    """ generate the URL path"""
    url = "http://{host}:{port}/v1/models/{model_name}".format(host=host, port=port, model_name=model_name)
    if version:
        url += 'versions/{version}'.format(version=version)
    url += ':{verb}'.format(verb=verb)
    return url


def get_model_prediction(model_input, model_name='amazon_review', signature_name='serving_default'):
    """ no error handling at all, just poc"""

    url = get_rest_url(model_name)
    #In the row format, inputs are keyed to instances key in the JSON request.
    #When there is only one named input, specify the value of instances key to be the value of the input:
    data = {"instances": [model_input]}
    
    rv = requests.post(url, data=json.dumps(data))
    if rv.status_code != requests.codes.ok:
        rv.raise_for_status()
    
    return rv.json()['predictions']

if __name__ == '__main__':

    print("\nGenerate REST url ...")
    url = get_rest_url(model_name='amazon_review')
    print(url)
    
    while True:
        print("\nEnter an Amazon review [:q for Quit]")
        if sys.version_info[0] <= 3:
            sentence = input()
        if sentence == ':q':
            break
        model_input = sentence
        model_prediction = get_model_prediction(model_input)
        print("The model predicted ...")
        print(model_prediction)

Overwriting tf_serving_rest_client.py


```bash
rhyme@ip-10-199-92-231:~/Desktop/Project$ python3 ./tf_serving_rest_client.py

Generate REST url ...
http://127.0.0.1:8501/v1/models/amazon_review:predict

Enter an Amazon review [:q for Quit]
I'm really enjoying reading this book!
The model predicted ...
[[0.0729031041, 0.0303271879, 0.896769702]]

Enter an Amazon review [:q for Quit]
```



### Task : Setup a gRPC Client to Perform Model Predictions

Modified from [https://github.com/tensorflow/serving/blob/master/tensorflow_serving/example/mnist_client.py](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/example/mnist_client.py#L152)

#### Predictions via gRPC

More sophisticated client-server connections

- Prediction data has to be converted to the Protobuf format
- Request types have designated types, e.g. float, int, bytes
- Payloads need to be converted to base64
- Connect to the server via gRPC stubs

#### gRPC vs REST: When to use which API standard

- Rest is easy to implement and debug
- RPC is more network efficient, smaller payloads
- RPC can provide much faster inferences!

In [None]:
%%writefile tf_serving_grpc_client.py
import sys
import grpc
from grpc.beta import implementations
import tensorflow as tf
from tensorflow_serving.apis import predict_pb2
from tensorflow_serving.apis import prediction_service_pb2, get_model_metadata_pb2
from tensorflow_serving.apis import prediction_service_pb2_grpc

def get_stub(host='127.0.0.1', port='8500'):
    """创建stub连接器"""
    """
    A module acting as the interface for gRPC client.

    You can do everything in the client side via GRPC.Stub, 
    including connecting, sending/receiving steaming or non-steaming requests, canceling calls
    and so on.
    """
    channel = grpc.insecure_channel(f'{host}:{port}') 
    stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)
    return stub


def get_model_prediction(model_input, stub, model_name='amazon_review', signature_name='serving_default'):
    """ no error handling at all, just poc"""
    
    """创建grpc request 对象"""
    request = predict_pb2.PredictRequest()
    
    """编辑request属性"""
    request.model_spec.name = model_name
    request.model_spec.signature_name = signature_name
    request.inputs['input_input'].CopyFrom(tf.make_tensor_proto(model_input))
    
    """将request 对象传给 stub 进行预测"""
    response = stub.Predict.future(request, 5.0)  # 5 seconds
    return response.result().outputs["output"].float_val


def get_model_version(model_name, stub):
    request = get_model_metadata_pb2.GetModelMetadataRequest()
    request.model_spec.name = 'amazon_review'
    request.metadata_field.append("signature_def")
    response = stub.GetModelMetadata(request, 10)
    # signature of loaded model is available here: response.metadata['signature_def']
    return response.model_spec.version.value

if __name__ == '__main__':
    print("\nCreate RPC connection ...")
    stub = get_stub()
    while True:
        print("\nEnter an Amazon review [:q for Quit]")
        if sys.version_info[0] <= 3:
            sentence = raw_input() if sys.version_info[0] < 3 else input()
        if sentence == ':q':
            break
        model_input = [sentence]
        model_prediction = get_model_prediction(model_input, stub)
        print("The model predicted ...")
        print(model_prediction)