In [11]:
import tensorflow as tf
import numpy as np

In [12]:
from typing import TypedDict


class DataType(TypedDict):
    train_x: np.ndarray
    train_y: np.ndarray
    test_x: np.ndarray
    test_y: np.ndarray
    X: np.ndarray
    Y: np.ndarray
data: DataType = np.load("../data.npy")

In [13]:
model = tf.keras.models.load_model("../model.h5")
model.summary()

Model: "sequential_14"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_104 (Dense)           (None, 8)                 48        
                                                                 
 dense_105 (Dense)           (None, 16)                144       
                                                                 
 dense_106 (Dense)           (None, 64)                1088      
                                                                 
 dense_107 (Dense)           (None, 16)                1040      
                                                                 
 dense_108 (Dense)           (None, 8)                 136       
                                                                 
 dense_109 (Dense)           (None, 1)                 9         
                                                                 
Total params: 2,465
Trainable params: 2,465
Non-train

In [14]:
def representative_data_gen():
  for input_value in data['train_x'].astype("float32"):
    yield [input_value]

converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen
# Ensure that if any ops can't be quantized, the converter throws an error
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
# Set the input and output tensors to uint8 (APIs added in r2.3)

tflite_model_quant = converter.convert()

INFO:tensorflow:Assets written to: C:\Users\JORGE~1.RUI\AppData\Local\Temp\tmpomjx35mx\assets




In [15]:
interpreter = tf.lite.Interpreter(model_content=tflite_model_quant)
input_type = interpreter.get_input_details()[0]['dtype']
print('input: ', input_type)
output_type = interpreter.get_output_details()[0]['dtype']
print('output: ', output_type)

input:  <class 'numpy.float32'>
output:  <class 'numpy.float32'>


In [16]:
import pathlib
tflite_model_quant_file = pathlib.Path("./model.tflite")

In [17]:
tflite_model_quant_file.resolve()

WindowsPath('C:/Users/jorge.ruiz/Documents/Documentación/Quantization/Quantize-Inference/demo/bikes_vs_weather/tflite/model.tflite')

In [18]:

with open(tflite_model_quant_file, "wb") as f:
    f.write(tflite_model_quant)

In [19]:
predictions = []

input_details = interpreter.get_input_details()[0]
output_details = interpreter.get_output_details()[0]
interpreter.allocate_tensors()



for x in data['test_x'].astype("float32"):

    input_data = np.expand_dims(x, axis=1).astype(input_details["dtype"])
    input_data = np.array([x])

    interpreter.set_tensor(input_details["index"], input_data)
    interpreter.invoke()
    output = interpreter.get_tensor(output_details["index"])[0]
    predictions.append(output[0])

predictions = np.array(predictions)

In [20]:
assert data["test_y"].shape == predictions.shape

In [26]:
np.square(predictions - data['test_y']).mean()


0.009385214104019192

In [22]:
from itcl_tflite2json import convert
model_path = str(tflite_model_quant_file)
print(model_path)
convert(model_path, "tflite_int8.json")

model.tflite
Loading model from: model.tflite
Writing to: tflite_int8.json


In [41]:
print(data['test_y'].shape)

(106,)


In [44]:
from itcl_inference_engine.network.Network import SequentialNetwork

net = SequentialNetwork.from_json_file("tflite_int8.json")



res = net.infer(data['test_x']).squeeze()

np.square(res - data['test_y']).mean()

0.009385214104019192