In [1]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_model_optimization as tfmot
from collections import OrderedDict

(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.fashion_mnist.load_data()
print(train_images.shape)
print(train_labels.shape)
train_images = train_images / 255.0
test_images = test_images / 255.0

(60000, 28, 28)
(60000,)


In [None]:
def get_functional_model():
    input_layer = tf.keras.layers.Input(shape = (28, 28, 1))
    conv_1 = tf.keras.layers.Conv2D(32, 5, use_bias = True, activation = 'relu')(input_layer)
    pool_1 = tf.keras.layers.MaxPool2D(pool_size = 2, strides = 2)(conv_1)
    conv_2 = tf.keras.layers.Conv2D(64, 5, use_bias = True, activation = 'relu')(pool_1)
    pool_2 = tf.keras.layers.MaxPool2D(pool_size = 2, strides = 2)(conv_2)
    conv_3 = tf.keras.layers.Conv2D(96, 3, use_bias = True, activation = 'relu')(pool_2)
    pool_3 = tf.keras.layers.MaxPool2D(pool_size = 2, strides = 2)(conv_3)
    flat_1 = tf.keras.layers.Flatten()(pool_3)
    dense_out = tf.keras.layers.Dense(10, activation = 'softmax', name = "dense_last")(flat_1)
    
    model = tf.keras.models.Model(inputs = input_layer, outputs = dense_out)
    opt = tf.keras.optimizers.Adam(learning_rate = 0.001)
    
    model.compile(optimizer = opt, 
        loss = 'sparse_categorical_crossentropy', 
        metrics = ['accuracy'])
    return model

model = get_functional_model()
model.summary()

In [None]:
loss, acc = model.evaluate(test_images, test_labels)
print('Test accuracy : ', "{:0.2%}".format(acc))

In [None]:
train_log = model.fit(train_images, train_labels,
    batch_size = 64,
    epochs = 15,
    validation_split = 0.1)

In [None]:
loss, acc = model.evaluate(test_images, test_labels)
print('Test accuracy : ', "{:0.2%}".format(acc))

In [None]:
save_dir = "./logs/"
save_path = save_dir + "model_v3"
# model.save(save_path)

In [None]:
# Load model_v3
save_dir = "./logs/"
save_path = save_dir + "model_v3"
# model = tf.keras.models.load_model(save_path)
loss, acc = model.evaluate(test_images, test_labels)
print('Test accuracy : ', "{:0.2%}".format(acc))

In [None]:
q_aware_model = tfmot.quantization.keras.quantize_model(model)

In [None]:
q_aware_model.compile(optimizer = 'adam', 
    loss = 'sparse_categorical_crossentropy', 
    metrics = ['accuracy'])
q_aware_model.summary()

In [None]:
q_aware_test_loss, q_aware_test_acc = q_aware_model.evaluate(test_images, test_labels)
print('Test accuracy : ', "{:0.2%}".format(q_aware_test_acc))

In [None]:
train_log = q_aware_model.fit(train_images, train_labels,
    batch_size = 128,
    # epochs = 15,
    epochs = 1,
    validation_split = 0.1)

In [None]:
q_aware_test_loss, q_aware_test_acc = q_aware_model.evaluate(test_images, test_labels)
print('Test accuracy : ', "{:0.2%}".format(q_aware_test_acc))

In [None]:
# Save quantized model
save_dir = "./logs/"
save_path = save_dir + "model_q4_func"
# q_aware_model.save(save_path)

In [2]:
# Load model 
save_dir = "./logs/"
save_path = save_dir + "model_q4_func"
q_aware_model : tf.keras.Model
with tfmot.quantization.keras.quantize_scope():
    q_aware_model = tf.keras.models.load_model(save_path)
q_aware_test_loss, q_aware_test_acc = q_aware_model.evaluate(test_images, test_labels)
print('Test accuracy : ', "{:0.2%}".format(q_aware_test_acc))

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089
Test accuracy :  90.38%


In [3]:
for i in range(len(q_aware_model.layers)):
    print("Layer : ", i, q_aware_model.layers[i].name," - params : ", len(q_aware_model.layers[i].variables))#, len(q_aware_model.layers[i]), "Weights len")

Layer :  0 input_2  - params :  0
Layer :  1 quantize_layer  - params :  3
Layer :  2 quant_conv2d_3  - params :  6
Layer :  3 quant_max_pooling2d_3  - params :  1
Layer :  4 quant_conv2d_4  - params :  6
Layer :  5 quant_max_pooling2d_4  - params :  1
Layer :  6 quant_conv2d_5  - params :  6
Layer :  7 quant_max_pooling2d_5  - params :  1
Layer :  8 quant_flatten_1  - params :  1
Layer :  9 quant_dense_last  - params :  7


In [115]:
bit_width = 8
quantized_and_dequantized = OrderedDict()
quantized = OrderedDict()
new_quantized_and_dequantized = OrderedDict()
new_quantized = OrderedDict()
layer_index_list = []
key_index_list = []

layer : tfmot.quantization.keras.QuantizeWrapperV2
for i, layer in enumerate(q_aware_model.layers):
    quantizer : tfmot.quantization.keras.quantizers.Quantizer
    weight : tf.Variable
    if hasattr(layer, '_weight_vars'):
        for weight, quantizer, quantizer_vars in layer._weight_vars:
            min_var = quantizer_vars['min_var']
            max_var = quantizer_vars['max_var']

            key = weight.name[:-2]
            layer_index_list.append(i)
            key_index_list.append(key)
            quantized_and_dequantized[key] = quantizer(inputs = weight, training = False, weights = quantizer_vars)
            quantized[key] = np.round(quantized_and_dequantized[key] / max_var * (2**(bit_width-1)-1))

            if "conv2d" in layer.name:
                new_quantized_and_dequantized[key] = tf.quantization.fake_quant_with_min_max_vars_per_channel(weight, min_var, max_var, bit_width, narrow_range = True, name = "New_quantized_" + str(i))
                new_quantized[key] = np.round(new_quantized_and_dequantized[key] / max_var * (2**(bit_width-1)-1))
            elif "dense" in layer.name:
                new_quantized_and_dequantized[key] = tf.quantization.fake_quant_with_min_max_vars(weight, min_var, max_var, bit_width, narrow_range = True, name = "New_quantized_" + str(i))
                new_quantized[key] = np.round(new_quantized_and_dequantized[key] / max_var * (2**(bit_width-1)-1))

for key in quantized:
    # print("Fake Quantized")
    print(key)
    if "dense" not in key:
        # print(quantized_and_dequantized[key][:,:,0,0])
        print(quantized[key][:,:,0,0])
    else:
        # print(quantized_and_dequantized[key][:,0])
        print(quantized[key][:,0])

    # print("New Fake Quantized")
    # print(key)
    # if "dense" not in key:
    #     print(new_quantized_and_dequantized[key][:,:,0,0])
    #     print(new_quantized[key][:,:,0,0])
    # else:
    #     print(new_quantized_and_dequantized[key][:,0])
    #     print(new_quantized[key][:,0])

conv2d_3/kernel
[[ 121.  127.   59.   68.    9.]
 [ -16.   53.   73.   37.  -24.]
 [  16.   23.    4.  -30.  -89.]
 [ -68.  -21.  -11.    0.   -7.]
 [-112. -108.  -36.   22.  -51.]]
conv2d_4/kernel
[[-70.  20.  17.   1.  29.]
 [-61.  -6. -15.  -3. -37.]
 [  4. -46.  13.  22.  -2.]
 [-48. -49.  16.  57.  16.]
 [-36.   9. -16. -27.  -3.]]
conv2d_5/kernel
[[ -1. -32. -22.]
 [ -8. -22.  51.]
 [-70.   2. -76.]]
dense_last/kernel
[-22.  22.  22. -28.  30. -28.  69. -45.  23.   4.  22. -18. -30. -57.
  -9. -66. -83. -16. -17. -54.  22.  32.  57. -49.   4. -12.  29.  -9.
  -8. -21.  33.   1. -47.  -8.   3.  18. -18.   8.   3.   3. -42.  -9.
  23. -23.  37.   2. -24.  23.  -3.   5.  34.   4.   3.  36. -31. -21.
  34. -41. -43.  40. -37.  29. -31. -19.   6. -24.  61.  14.  13.  34.
 -42.   6.  -9. -47.  28.  12. -28. -20.  26.  -5.  10. -21.  22. -15.
 -10.  36.  29.  48. -35.   0. -70. -22. -39.   1.  30.  13.]


In [116]:
def self_quantize_function(input, min_var, max_var, bits, narrow_range = False):
    if not narrow_range:
        scale = (max_var - min_var) / (2**bits - 1)
    else:
        scale = (max_var - min_var) / (2**bits - 2)
    min_adj = scale * np.round(min_var / scale)
    max_adj = max_var + min_adj - min_var
    # print("Scale : ", scale)
    return scale * np.round(input / scale)

for idx, layer_index in enumerate(layer_index_list):
    m_vars = {variable.name: variable for i, variable in enumerate(q_aware_model.layers[layer_index].non_trainable_variables) if key_index_list[idx] in variable.name}
    kernel = {variable.name: variable for i, variable in enumerate(q_aware_model.layers[layer_index].trainable_variables) if "kernel" in variable.name}
    min_key = list(key for key in m_vars if "min" in key)[0]
    max_key = list(key for key in m_vars if "max" in key)[0]
    min_var = m_vars[min_key]
    max_var = m_vars[max_key]

    kernel_index = 0
    self_quantized_and_dequantized = self_quantize_function(q_aware_model.layers[layer_index].trainable_variables[kernel_index], min_var, max_var, bit_width, narrow_range = True)
    
    print(key_index_list[idx])
    # print("Self Quantized")
    if "dense" not in key_index_list[idx]:
        # print(self_quantized_and_dequantized[:,:,0,0])
        self_quantized = np.round(self_quantized_and_dequantized / max_var * (2**(bit_width - 1) - 1))
        print(self_quantized[:,:,0,0])
    else:
        # print(self_quantized_and_dequantized[:,0])
        self_quantized = np.round(self_quantized_and_dequantized / max_var * (2**(bit_width - 1) - 1))
        print(self_quantized[:,0])

conv2d_3/kernel
[[ 121.  127.   59.   68.    9.]
 [ -16.   53.   73.   37.  -24.]
 [  16.   23.    4.  -30.  -89.]
 [ -68.  -21.  -11.    0.   -7.]
 [-112. -108.  -36.   22.  -51.]]
conv2d_4/kernel
[[-70.  20.  17.   1.  29.]
 [-61.  -6. -15.  -3. -37.]
 [  4. -46.  13.  22.  -2.]
 [-48. -49.  16.  57.  16.]
 [-36.   9. -16. -27.  -3.]]
conv2d_5/kernel
[[ -1. -32. -22.]
 [ -8. -22.  51.]
 [-70.   2. -76.]]
dense_last/kernel
[-22.  22.  22. -28.  30. -28.  69. -45.  23.   4.  22. -18. -30. -57.
  -9. -66. -83. -16. -17. -54.  22.  32.  57. -49.   4. -12.  29.  -9.
  -8. -21.  33.   1. -47.  -8.   3.  18. -18.   8.   3.   3. -42.  -9.
  23. -23.  37.   2. -24.  23.  -3.   5.  34.   4.   3.  36. -31. -21.
  34. -41. -43.  40. -37.  29. -31. -19.   6. -24.  61.  14.  13.  34.
 -42.   6.  -9. -47.  28.  12. -28. -20.  26.  -5.  10. -21.  22. -15.
 -10.  36.  29.  48. -35.  -0. -70. -22. -39.   1.  30.  13.]


In [114]:
l = 2
print(q_aware_model.layers[l].quantize_config.get_config())
print(q_aware_model.layers[l].quantize_config.activation_quantizer)

{'weight_attrs': ['kernel'], 'activation_attrs': ['activation'], 'quantize_output': False}
<tensorflow_model_optimization.python.core.quantization.keras.quantizers.MovingAverageQuantizer object at 0x000002880459F3D0>


In [117]:
# Conversion to TF Lite model
converter = tf.lite.TFLiteConverter.from_keras_model(q_aware_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
    
quantized_tflite_model = converter.convert()



INFO:tensorflow:Assets written to: C:\Users\rosal\AppData\Local\Temp\tmpr6l4hsb7\assets


INFO:tensorflow:Assets written to: C:\Users\rosal\AppData\Local\Temp\tmpr6l4hsb7\assets


In [118]:
def evaluate_model(interpreter: tf.lite.Interpreter):
  input_index = interpreter.get_input_details()[0]["index"]
  output_index = interpreter.get_output_details()[0]["index"]

  # Run predictions on every image in the "test" dataset.
  prediction_digits = []
  for i, test_image in enumerate(test_images):
    if i % 1000 == 0:
      print('Evaluated on {n} results so far.'.format(n=i))
    # Pre-processing: add batch dimension and convert to float32 to match with
    # the model's input data format.
    # print("Shape : ", test_image.shape)
    test_image = np.expand_dims(test_image, axis = 0).astype(np.float32)
    test_image = np.expand_dims(test_image, axis = 3).astype(np.float32)
    # print("New Shape : ", test_image.shape)
    interpreter.set_tensor(input_index, test_image)

    # Run inference.
    interpreter.invoke()

    # Post-processing: remove batch dimension and find the digit with highest
    # probability.
    output = interpreter.tensor(output_index)
    digit = np.argmax(output()[0])
    prediction_digits.append(digit)

  print('\n')
  # Compare prediction results with ground truth labels to calculate accuracy.
  prediction_digits = np.array(prediction_digits)
  accuracy = (prediction_digits == test_labels).mean()
  return accuracy

In [119]:
interpreter = tf.lite.Interpreter(model_content = quantized_tflite_model)
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
print("Input Shape: ", input_details[0]['shape'])
print("Output Shape: ", output_details[0]['shape'])

test_accuracy = evaluate_model(interpreter)

print('Quant TFLite test_accuracy:', test_accuracy)
print('Quant TF test accuracy:', q_aware_test_acc)

Input Shape:  [ 1 28 28  1]
Output Shape:  [ 1 10]
Evaluated on 0 results so far.
Evaluated on 1000 results so far.
Evaluated on 2000 results so far.
Evaluated on 3000 results so far.
Evaluated on 4000 results so far.
Evaluated on 5000 results so far.
Evaluated on 6000 results so far.
Evaluated on 7000 results so far.
Evaluated on 8000 results so far.
Evaluated on 9000 results so far.


Quant TFLite test_accuracy: 0.9038
Quant TF test accuracy: 0.9038000106811523


In [None]:
save_dir = "./logs/"
quant_file = 'quant_model_q4_func.tflite'
save_path = save_dir + quant_file
# with open(save_path, 'wb') as f:
#   f.write(quantized_tflite_model)

In [120]:
ind_index = 10
test_image = test_images[ind_index]
test_image = np.expand_dims(test_image, axis = 0).astype(np.float32)
test_image = np.expand_dims(test_image, axis = 3).astype(np.float32)

interpreter.set_tensor(input_details[0]['index'], test_image)
interpreter.invoke()
output_array = interpreter.get_tensor(output_details[0]['index'])
# print(output_array.shape)
digit = np.argmax(output_array[0])
probability = max(output_array[0])
print("Input index : ", test_labels[ind_index])
print("Output index : ", digit, "{0:.2%}".format(probability))


Input index :  4
Output index :  4 99.61%
