In [1]:
import tensorflow as tf
from conversion_tf import GEMMDecisionTreeImplKeras, GEMMDecisionTreeImpl
from hummingbird.ml import convert
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
import numpy as np
from openvino.runtime import Core
import time
import torch

2023-03-31 16:33:38.553049: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def representative_dataset():
    for _ in range(100):
      data = np.random.uniform(low=0., high=8., size=(1,8))
      yield [data.astype(np.float32)]
 
tf.config.run_functions_eagerly(True)

forest = RandomForestClassifier(n_estimators=1, max_depth=1)
X, y = make_classification(n_samples=1300, n_features=4,
                           n_informative=4, n_redundant=0,
                           random_state=0, shuffle=True,
                           n_classes=4)

x_train, y_train = X[:1000], y[:1000]
x_test, y_test = X[1000:], y[1000:]

forest.fit(x_train, y_train)

X = tf.constant([1, 2, 3, 4, 5, 6, 7, 8], shape=[1, 8], dtype=tf.int32)
X_float = tf.constant([1., 2., 3., 4., 5., 6., 7., 8.], shape=[1, 8])
X_8 = tf.constant([1, 2, 3, 4, 5, 6, 7, 8], shape=[1, 8], dtype=tf.int8)

2023-03-31 16:33:40.984921: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-31 16:33:40.986359: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [3]:
model_gemm = GEMMDecisionTreeImpl(forest)
keras_model_gemm = GEMMDecisionTreeImplKeras(forest)

In [4]:
x = np.array([[1.,2.,3.,4.]], dtype=np.float32)
x_8 = np.array([[1, 2]], dtype=np.int8)

### NORMAL GEMM

In [5]:
start_gemm = time.perf_counter()
res_gemm = model_gemm(x)
time_gemm = (time.perf_counter() - start_gemm) * 1000

### KERAS

In [5]:
start_keras = time.perf_counter()
res_keras = keras_model_gemm(x)
time_keras = (time.perf_counter() - start_keras) * 1000

### OPENVINO

In [29]:
model_xml = f"../../saved_models/ncs/test/first/first_ir.xml"

# Load model
ie = Core()
model = ie.read_model(model=model_xml)

# Neural Compute Stick
# compiled_model = ie.compile_model(model=model, device_name="MYRIAD")
compiled_model = ie.compile_model(model=model, device_name="CPU")

del model

input_layer = compiled_model.input(0)
output_layer = compiled_model.output(0)

start_vino = time.perf_counter()
res_vino = compiled_model(np.array([[1,2,3,4]]))[output_layer]
time_vino = (time.perf_counter() - start_vino) * 1000

### HUMMINGBIRD

In [6]:
container = convert(forest, 'torch', extra_config={"tree_implementation":"gemm"})
hb_model = container.model._operators[0]

In [8]:
start_hb = time.perf_counter()
res_hb = container.predict(x)
time_hb = (time.perf_counter() - start_hb) * 1000

### TF LITE

In [31]:
interpreter = tf.lite.Interpreter('../../saved_models/edgetpu/random_forest/gemm/float32/final_eval/benchmarked/model_1_4_2_1.tflite')
interpreter.allocate_tensors()

output = interpreter.get_output_details()[0]  
input = interpreter.get_input_details()[0]  

interpreter.set_tensor(input['index'], x_8)

start_lite = time.perf_counter()
interpreter.invoke()
time_lite = (time.perf_counter() - start_lite) * 1000
pred_lite = interpreter.get_tensor(output['index'])

In [11]:
res_vino

array([[[0.22851562],
        [0.04525757],
        [0.23754883],
        [0.48876953]]], dtype=float32)

In [7]:
hb_model.forward(torch.tensor(x))

(tensor([0]), tensor([[0.5129, 0.1392, 0.0825, 0.2655]], grad_fn=<TBackward0>))

In [8]:
res_keras

<tf.Tensor: shape=(1, 4), dtype=float32, numpy=array([[0.5128866 , 0.13917525, 0.08247422, 0.26546392]], dtype=float32)>

### RESULTS

In [33]:
print(f'MODEL_GEMM: \t{time_gemm : .2f}ms')
print(f'MODEL_KERAS:\t{time_keras : .2f}ms')
print(f'OPENVINO:   \t{time_vino : .2f}ms')
print(f'HUMMINGBIRD:\t{time_hb : .2f}ms')
print(f'TFLITE:     \t{time_lite : .2f}ms')

MODEL_GEMM: 	 2.10ms
MODEL_KERAS:	 2.79ms
OPENVINO:   	 0.24ms
HUMMINGBIRD:	 1.19ms
TFLITE:     	 0.06ms
