# ResNet Model Latency Testing

Testing ResNet model with the default Seldon Tensor and Tensorflow Tensor.
 
<img src="dog.jpeg"/>

In [26]:
!cp ../../../proto/prediction.proto ./proto
!cp -vr ../../../proto/tensorflow/tensorflow .
!python -m grpc.tools.protoc -I./ --python_out=./ --grpc_python_out=./ ./proto/prediction.proto

'../../../proto/tensorflow/tensorflow' -> './tensorflow'
'../../../proto/tensorflow/tensorflow/core' -> './tensorflow/core'
'../../../proto/tensorflow/tensorflow/core/framework' -> './tensorflow/core/framework'
'../../../proto/tensorflow/tensorflow/core/framework/types.proto' -> './tensorflow/core/framework/types.proto'
'../../../proto/tensorflow/tensorflow/core/framework/resource_handle.proto' -> './tensorflow/core/framework/resource_handle.proto'
'../../../proto/tensorflow/tensorflow/core/framework/tensor_shape.proto' -> './tensorflow/core/framework/tensor_shape.proto'
'../../../proto/tensorflow/tensorflow/core/framework/tensor.proto' -> './tensorflow/core/framework/tensor.proto'


# Download model


In [27]:
!mkdir -p model
!wget -O  model/saved_model.pb https://storage.googleapis.com/inference-eu/models_zoo/resnet_V1_50/saved_model/saved_model.pb

--2018-12-04 16:46:00--  https://storage.googleapis.com/inference-eu/models_zoo/resnet_V1_50/saved_model/saved_model.pb
Resolving storage.googleapis.com (storage.googleapis.com)... 216.58.212.112, 2a00:1450:4009:807::2010
Connecting to storage.googleapis.com (storage.googleapis.com)|216.58.212.112|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 102619858 (98M) [application/octet-stream]
Saving to: ‘model/saved_model.pb’


2018-12-04 16:46:04 (25.3 MB/s) - ‘model/saved_model.pb’ saved [102619858/102619858]



# Wrap inference

In [28]:
!s2i build -E environment_grpc . seldonio/seldon-core-s2i-python36:1.2.1-dev seldon-resnet2.4

---> Installing application source...
---> Installing dependencies ...
You are using pip version 18.0, however version 18.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.
Build completed successfully


In [36]:
!docker run --name "resnet" -d --rm -p 5000:5000 -v ${PWD}/model:/model seldon-resnet2.4

4803ea44a3f8070cc7f2b44d284780d3ceccdbb27a5faeba9f884b71c0102ff3


# Test

In [30]:
import json
import requests
import base64
from proto import prediction_pb2
from proto import prediction_pb2_grpc
import grpc
import numpy as np
import pickle
import tensorflow as tf
import cv2
import datetime
import tensorflow as tf


In [31]:
def image_2_vector(input_file):
    nparr = np.fromfile(input_file, dtype=np.float32)
    print("nparr",nparr.dtype,nparr.shape)
    img = cv2.imdecode(nparr, cv2.IMREAD_ANYCOLOR)
    print("img",img.dtype,img.shape)
    print("Initial size",img.shape)
    image = cv2.resize(img, (w, h))
    print("image",image.dtype)
    print("Converted size",image.shape)

    vector = image.reshape((w * h * 3))
    print("vector shape",vector.shape, "vector type", vector.dtype )
    return vector

def image_2_bytes(input_file):
    with open(input_file, "rb") as binary_file:
        # Read the whole file at once
        data = binary_file.read()

        #data = data.tobytes()
        #print(data)
        print("binary data size:", len(data), type(data))
    return data

def run(function,image_path,iterations=1):
    w = 224
    h = 224

    # NOTE(gRPC Python Team): .close() is possible on a channel and should be
    # used in circumstances in which the with statement does not fit the needs
    # of the code.
    with grpc.insecure_channel('localhost:5000') as channel:
        stub = prediction_pb2_grpc.ModelStub(channel)
        print("seldon stub", stub)
        start_time = datetime.datetime.now()
        processing_times = np.zeros((0),int)

        img = cv2.imread(image_path)
        print("img type", type(img))
        print("img",img.shape)
        print("Initial size",img.shape)
        image = cv2.resize(img, (w, h))
        image = image.reshape(1, w, h, 3)
        print("image",image.dtype)
        print("Converted size",image.shape)
        
        if function == "tensor":
            datadef = prediction_pb2.DefaultData(
                names = 'x',
                tensor = prediction_pb2.Tensor(
                    shape = image.shape,
                    values = image.ravel().tolist()
                )
            )
        elif function == "tftensor":
            print("Create tftensor")
            datadef = prediction_pb2.DefaultData(
                names = 'x',
                tftensor = tf.make_tensor_proto(image)
            )
            
        GRPC_request = prediction_pb2.SeldonMessage(
            data = datadef
        )
            
        for I in range(iterations):
            start_time = datetime.datetime.now()
            response = stub.Predict(request=GRPC_request)
            end_time = datetime.datetime.now()
            duration = (end_time - start_time).total_seconds() * 1000
            processing_times = np.append(processing_times,np.array([int(duration)]))
            
        print('processing time for all iterations')
        for x in processing_times:
            print(x,"ms")
        print('processing_statistics')
        print('average time:',round(np.average(processing_times),1), 'ms; average speed:', round(1000/np.average(processing_times),1),'fps')
        print('median time:',round(np.median(processing_times),1), 'ms; median speed:',round(1000/np.median(processing_times),1),'fps')
        print('max time:',round(np.max(processing_times),1), 'ms; max speed:',round(1000/np.max(processing_times),1),'fps')
        print('min time:',round(np.min(processing_times),1),'ms; min speed:',round(1000/np.min(processing_times),1),'fps')
        print('time percentile 90:',round(np.percentile(processing_times,90),1),'ms; speed percentile 90:',round(1000/np.percentile(processing_times,90),1),'fps')
        print('time percentile 50:',round(np.percentile(processing_times,50),1),'ms; speed percentile 50:',round(1000/np.percentile(processing_times,50),1),'fps')
        print('time standard deviation:',round(np.std(processing_times)))
        print('time variance:',round(np.var(processing_times)))


In [38]:
run("tensor","./dog.jpeg",iterations=100)

seldon stub <proto.prediction_pb2_grpc.ModelStub object at 0x7fdfd646db70>
img type <class 'numpy.ndarray'>
img (224, 224, 3)
Initial size (224, 224, 3)
image uint8
Converted size (1, 224, 224, 3)
processing time for all iterations
86 ms
78 ms
89 ms
84 ms
78 ms
77 ms
78 ms
77 ms
74 ms
81 ms
79 ms
75 ms
80 ms
79 ms
77 ms
75 ms
75 ms
76 ms
75 ms
76 ms
83 ms
77 ms
80 ms
79 ms
78 ms
77 ms
77 ms
76 ms
74 ms
77 ms
74 ms
79 ms
78 ms
75 ms
75 ms
78 ms
79 ms
76 ms
80 ms
75 ms
78 ms
76 ms
76 ms
81 ms
78 ms
78 ms
75 ms
77 ms
77 ms
74 ms
75 ms
81 ms
75 ms
75 ms
76 ms
78 ms
72 ms
79 ms
81 ms
80 ms
74 ms
82 ms
77 ms
77 ms
77 ms
78 ms
75 ms
77 ms
77 ms
77 ms
75 ms
79 ms
76 ms
80 ms
78 ms
75 ms
76 ms
76 ms
79 ms
77 ms
76 ms
76 ms
80 ms
77 ms
74 ms
79 ms
75 ms
73 ms
77 ms
76 ms
78 ms
78 ms
76 ms
77 ms
76 ms
76 ms
75 ms
74 ms
77 ms
82 ms
processing_statistics
average time: 77.3 ms; average speed: 12.9 fps
median time: 77.0 ms; median speed: 13.0 fps
max time: 89 ms; max speed: 11.2 fps
min time: 72 ms; 

In [33]:
run("tftensor","./dog.jpeg",iterations=100)

seldon stub <proto.prediction_pb2_grpc.ModelStub object at 0x7fdfd646dac8>
img type <class 'numpy.ndarray'>
img (224, 224, 3)
Initial size (224, 224, 3)
image uint8
Converted size (1, 224, 224, 3)
Create tftensor
processing time for all iterations
75 ms
71 ms
78 ms
71 ms
73 ms
81 ms
81 ms
76 ms
73 ms
78 ms
74 ms
71 ms
70 ms
73 ms
79 ms
72 ms
77 ms
68 ms
77 ms
76 ms
71 ms
73 ms
71 ms
69 ms
72 ms
74 ms
70 ms
73 ms
71 ms
68 ms
72 ms
71 ms
69 ms
70 ms
69 ms
72 ms
70 ms
77 ms
66 ms
67 ms
77 ms
71 ms
72 ms
70 ms
72 ms
68 ms
71 ms
69 ms
71 ms
70 ms
71 ms
73 ms
70 ms
70 ms
75 ms
70 ms
72 ms
70 ms
74 ms
72 ms
72 ms
74 ms
71 ms
73 ms
73 ms
72 ms
75 ms
72 ms
69 ms
70 ms
71 ms
70 ms
71 ms
68 ms
69 ms
70 ms
73 ms
70 ms
69 ms
75 ms
69 ms
74 ms
71 ms
72 ms
69 ms
69 ms
73 ms
71 ms
70 ms
66 ms
75 ms
71 ms
69 ms
70 ms
70 ms
73 ms
72 ms
71 ms
70 ms
69 ms
processing_statistics
average time: 71.8 ms; average speed: 13.9 fps
median time: 71.0 ms; median speed: 14.1 fps
max time: 81 ms; max speed: 12.3 fps
m

The stats illustrate that the tftensor payload which is the only difference improves on the latency performance.

In [34]:
!docker rm -f resnet

resnet
