In [3]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_model_optimization as tfmot

(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.fashion_mnist.load_data()
print(train_images.shape)
print(train_labels.shape)
train_images = train_images / 255.0
test_images = test_images / 255.0

(60000, 28, 28)
(60000,)


In [495]:
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, 5, input_shape = (28, 28, 1)),
    tf.keras.layers.ReLU(),
    tf.keras.layers.MaxPool2D(pool_size = 2, strides = 2),
    tf.keras.layers.Conv2D(64, 5),
    tf.keras.layers.ReLU(),
    tf.keras.layers.MaxPool2D(pool_size = 2, strides = 2),
    tf.keras.layers.Conv2D(96, 3),
    tf.keras.layers.ReLU(),
    tf.keras.layers.MaxPool2D(pool_size = 2, strides = 2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(10, activation = 'softmax')
])
model.compile(optimizer = 'adam', 
    loss = 'sparse_categorical_crossentropy', 
    metrics = ['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 24, 24, 32)        832       
                                                                 
 re_lu (ReLU)                (None, 24, 24, 32)        0         
                                                                 
 max_pooling2d (MaxPooling2D  (None, 12, 12, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 8, 8, 64)          51264     
                                                                 
 re_lu_1 (ReLU)              (None, 8, 8, 64)          0         
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 4, 4, 64)         0         
 2D)                                                    

In [496]:
for i in range(len(model.layers)):
    print("Layer : ", i, model.layers[i].name, len(model.layers[i].get_weights()))#, len(q_aware_model.layers[i]), "Weights len")


Layer :  0 conv2d 2
Layer :  1 re_lu 0
Layer :  2 max_pooling2d 0
Layer :  3 conv2d_1 2
Layer :  4 re_lu_1 0
Layer :  5 max_pooling2d_1 0
Layer :  6 conv2d_2 2
Layer :  7 re_lu_2 0
Layer :  8 max_pooling2d_2 0
Layer :  9 flatten 0
Layer :  10 dense 2


In [497]:
loss, acc = model.evaluate(test_images, test_labels)
print('Test accuracy : ', "{:0.2%}".format(acc))

Test accuracy :  11.36%


In [498]:
train_log = model.fit(train_images, train_labels,
    batch_size = 64,
    epochs = 15,
    validation_split = 0.1)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [499]:
loss, acc = model.evaluate(test_images, test_labels)
print('Test accuracy : ', "{:0.2%}".format(acc))

Test accuracy :  90.52%


In [151]:
save_dir = "./logs/"
save_path = save_dir + "model_v2"
# model.save(save_path)



INFO:tensorflow:Assets written to: ./logs/model_v2\assets


INFO:tensorflow:Assets written to: ./logs/model_v2\assets


In [520]:
# Load model_v2
save_dir = "./logs/"
save_path = save_dir + "model_v2"
# model = tf.keras.models.load_model(save_path)
loss, acc = model.evaluate(test_images, test_labels)
print('Test accuracy : ', "{:0.2%}".format(acc))

Test accuracy :  90.80%


In [521]:
q_aware_model = tfmot.quantization.keras.quantize_model(model)

In [522]:
q_aware_model.compile(optimizer = 'adam', 
    loss = 'sparse_categorical_crossentropy', 
    metrics = ['accuracy'])
q_aware_model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 quantize_layer_4 (QuantizeL  (None, 28, 28, 1)        3         
 ayer)                                                           
                                                                 
 quant_conv2d_9 (QuantizeWra  (None, 24, 24, 32)       833       
 pperV2)                                                         
                                                                 
 quant_re_lu_9 (QuantizeWrap  (None, 24, 24, 32)       3         
 perV2)                                                          
                                                                 
 quant_max_pooling2d_9 (Quan  (None, 12, 12, 32)       1         
 tizeWrapperV2)                                                  
                                                                 
 quant_conv2d_10 (QuantizeWr  (None, 8, 8, 64)        

In [None]:
q_aware_test_loss, q_aware_test_acc = q_aware_model.evaluate(test_images, test_labels)
print('Test accuracy : ', "{:0.2%}".format(q_aware_test_acc))

In [523]:
train_log = q_aware_model.fit(train_images, train_labels,
    batch_size = 128,
    # epochs = 15,
    epochs = 1,
    validation_split = 0.1)



In [524]:
q_aware_test_loss, q_aware_test_acc = q_aware_model.evaluate(test_images, test_labels)
print('Test accuracy : ', "{:0.2%}".format(q_aware_test_acc))

Test accuracy :  90.82%


In [525]:
# Save quantized model
save_dir = "./logs/"
save_path = save_dir + "model_q3"
# q_aware_model.save(save_path)



INFO:tensorflow:Assets written to: ./logs/model_q3\assets


INFO:tensorflow:Assets written to: ./logs/model_q3\assets


In [567]:
# Load model 
save_dir = "./logs/"
save_path = save_dir + "model_q3"
# with tfmot.quantization.keras.quantize_scope():
#     q_aware_model = tf.keras.models.load_model(save_path)
q_aware_test_loss, q_aware_test_acc = q_aware_model.evaluate(test_images, test_labels)
print('Test accuracy : ', "{:0.2%}".format(q_aware_test_acc))

Test accuracy :  90.82%


In [516]:
for i in range(len(q_aware_model.layers)):
    print("Layer : ", i, q_aware_model.layers[i].name," - params : ", len(q_aware_model.layers[i].variables))#, len(q_aware_model.layers[i]), "Weights len")

Layer :  0 quantize_layer_3  - params :  3
Layer :  1 quant_conv2d  - params :  3
Layer :  2 quant_re_lu  - params :  3
Layer :  3 quant_max_pooling2d  - params :  1
Layer :  4 quant_conv2d_1  - params :  3
Layer :  5 quant_re_lu_1  - params :  3
Layer :  6 quant_max_pooling2d_1  - params :  1
Layer :  7 quant_conv2d_2  - params :  3
Layer :  8 quant_re_lu_2  - params :  3
Layer :  9 quant_max_pooling2d_2  - params :  1
Layer :  10 quant_flatten  - params :  1
Layer :  11 quant_dense  - params :  7


In [568]:
for lay in range(len(q_aware_model.layers)):
    # print("Type : ", type(q_aware_model.layers[lay]))
    print("Layer", lay, q_aware_model.layers[lay].name)
    print([(q_aware_model.layers[lay].variables[j].name, q_aware_model.layers[lay].variables[j].shape) for j in range(len(q_aware_model.layers[lay].variables))])

Type :  <class 'tensorflow_model_optimization.python.core.quantization.keras.quantize_layer.QuantizeLayer'>
Layer 0 quantize_layer_4
[('quantize_layer_4/quantize_layer_4_min:0', TensorShape([])), ('quantize_layer_4/quantize_layer_4_max:0', TensorShape([])), ('quantize_layer_4/optimizer_step:0', TensorShape([]))]
Type :  <class 'tensorflow_model_optimization.python.core.quantization.keras.quantize_wrapper.QuantizeWrapperV2'>
Layer 1 quant_conv2d_9
[('conv2d_9/kernel:0', TensorShape([5, 5, 1, 32])), ('conv2d_9/bias:0', TensorShape([32])), ('quant_conv2d_9/optimizer_step:0', TensorShape([]))]
Type :  <class 'tensorflow_model_optimization.python.core.quantization.keras.quantize_wrapper.QuantizeWrapperV2'>
Layer 2 quant_re_lu_9
[('quant_re_lu_9/optimizer_step:0', TensorShape([])), ('quant_re_lu_9/output_min:0', TensorShape([])), ('quant_re_lu_9/output_max:0', TensorShape([]))]
Type :  <class 'tensorflow_model_optimization.python.core.quantization.keras.quantize_wrapper.QuantizeWrapperV2'>
L

In [513]:
for i, layer in enumerate(q_aware_model.layers):
    for elem in vars(layer):
        if "quant" in elem or "kern" in elem or "weight" in elem or "var" in elem or "bias" in elem:
            print(i, elem)
        # if elem == '_output_quantizer_vars':
        #     print(i, layer._output_quantizer_vars)
        # if elem == '_weight_vars':
        #     print(i, layer._weight_vars)

0 _trainable_weights
0 _non_trainable_weights
0 _initial_weights
0 _captured_weight_regularizer
0 quantizer
0 quantizer_vars
1 _trainable_weights
1 _non_trainable_weights
1 _initial_weights
1 _captured_weight_regularizer
1 quantize_config
1 _weight_vars
1 _quantize_activations
1 _output_quantizers
2 _trainable_weights
2 _non_trainable_weights
2 _initial_weights
2 _captured_weight_regularizer
2 quantize_config
2 _weight_vars
2 _quantize_activations
2 _output_quantizers
2 _output_quantizer_vars
3 _trainable_weights
3 _non_trainable_weights
3 _initial_weights
3 _captured_weight_regularizer
3 quantize_config
3 _weight_vars
3 _quantize_activations
3 _output_quantizers
4 _trainable_weights
4 _non_trainable_weights
4 _initial_weights
4 _captured_weight_regularizer
4 quantize_config
4 _weight_vars
4 _quantize_activations
4 _output_quantizers
5 _trainable_weights
5 _non_trainable_weights
5 _initial_weights
5 _captured_weight_regularizer
5 quantize_config
5 _weight_vars
5 _quantize_activations
5

In [595]:
np.max(q_aware_model.layers[1].variables[0])
np.min(q_aware_model.layers[1].variables[0])
q_aware_model.layers[1].__dict__.keys()
# q_aware_model.layers[1].get_config()
q_aware_model.layers[1]._output_quantizers
tflite_weights = np.load('tensor-conv-1.npy')
print("T Lite Model Values")
print( 0.002201777882874012*tflite_weights.T[:5,:5,:,0])
print("Q Aware")
print(q_aware_model.layers[1].variables[0][:5,:5,:,0].numpy().reshape(5,5,1))
# tf.__version__
# tfmot.__file__

T Lite Model Values
[[[-0.01541245  0.00880711  0.09247467 -0.11229067  0.019816  ]
  [ 0.01100889  0.15412445  0.20696712  0.12770312 -0.07045689]
  [-0.14531734 -0.27962579 -0.2289849   0.1299049  -0.21797601]
  [ 0.10788712  0.11229067 -0.01761422  0.01761422  0.24219557]
  [ 0.05284267  0.04403556  0.019816   -0.08146578  0.00660533]]]
Q Aware
[[[-0.01472886]
  [ 0.01095581]
  [-0.14489803]
  [ 0.10731277]
  [ 0.05341939]]

 [[ 0.00825399]
  [ 0.15507933]
  [-0.2796258 ]
  [ 0.11221164]
  [ 0.0431769 ]]

 [[ 0.09138181]
  [ 0.20608759]
  [-0.22992744]
  [-0.01802453]
  [ 0.02010153]]

 [[-0.11331234]
  [ 0.12767641]
  [ 0.12989913]
  [ 0.016807  ]
  [-0.08187915]]

 [[ 0.02014185]
  [-0.06991023]
  [-0.21795177]
  [ 0.2419035 ]
  [ 0.00760387]]]


In [569]:
bit_width = 8
for i, layer in enumerate(q_aware_model.layers):
    if hasattr(layer, '_weight_vars'):
        for weight, quantizer, quantizer_vars in layer._weight_vars:
            quantized_and_dequantized = quantizer(weight, training = False, weights = quantizer_vars)
            # print(quantized_and_dequantized)
            min_var = quantizer_vars['min_var']
            max_var = quantizer_vars['max_var']
            new_quantized_and_dequantized = tf.quantization.fake_quant_with_min_max_vars(weight, min_var, max_var, bit_width, narrow_range = True, name = "New_quantized")
            # print(min_var)
            # print(max_var)
            # print(weight)
            # print(type(quantizer))
            # print(type(quantizer_vars))
            # print(layer._weight_vars[0][0])
            # print(layer._weight_vars[0][1])
            # print(layer._weight_vars[0][2])
            # print(type(quantized_and_dequantized))
            # quantized = dequantize(quantize_and_dequantized, min_var, max_var, quantizer)
            quantized = np.round(quantized_and_dequantized / max_var * (2**(bit_width-1)-1))
            # new_quantized = tf.quantization.fake_quant_with_min_max_vars_per_channel(weight, min_var*np.ones(10), max_var*np.ones(10), bit_width, narrow_range = True, name = "New_quantized")
# Values of weight variable and q_aware_model are the same
# print(weight[:5,:5])
print("Quantized model weights")
print(q_aware_model.layers[11].get_weights()[0][:5,:5])

# print("Quantizer num_bits : ", quantizer.num_bits)
# print("Quantizer per_axis : ", quantizer.per_axis)
# print("Quantizer symmetric : ", quantizer.symmetric)
# print("Quantizer narrow_range : ", quantizer.narrow_range)

# print("Fake Quantized values")
# print(quantized_and_dequantized[:5,:5])
print("Manual Quantized values")
print(quantized[:5,:5])
# tflite_weights = np.load('tensor-dense-3.npy')
# print("T Lite Model Values")
# print(tflite_weights.T[:5,:5])

print("New fake quantized")
print(new_quantized_and_dequantized[:5,:5])
# print(tf.math.reduce_max(new_quantized_and_dequantized))


Quantized model weights
[[ 0.28528264 -0.5762946  -0.19295678 -0.45415172 -0.35194117]
 [ 0.20125131 -0.04038209  0.19163042 -0.22182827 -0.21663167]
 [-0.5341834   0.06868612 -0.14901407  0.10804179  0.19337347]
 [ 0.2388118   0.047177    0.0628055  -0.40097913  0.2185235 ]
 [-0.09959683 -0.06364337  0.26062194 -0.00146686  0.04848435]]
Manual Quantized values
[[ 47. -94. -31. -74. -57.]
 [ 33.  -7.  31. -36. -35.]
 [-87.  11. -24.  18.  32.]
 [ 39.   8.  10. -65.  36.]
 [-16. -10.  43.   0.   8.]]
New fake quantized
tf.Tensor(
[[ 0.28816587 -0.57633173 -0.19006686 -0.453708   -0.34947777]
 [ 0.20232923 -0.04291832  0.19006686 -0.2207228  -0.21459161]
 [-0.5334134   0.06744308 -0.14714853  0.1103614   0.19619805]
 [ 0.23911637  0.04904951  0.06131189 -0.3985273   0.2207228 ]
 [-0.09809902 -0.06131189  0.26364112  0.          0.04904951]], shape=(5, 5), dtype=float32)


In [570]:
def quantize_function(input, min_var, max_var, bits, narrow_range = False):
    # Very important
    if not narrow_range:
        scale = (max_var - min_var) / (2**bits - 1)
    else:
        scale = (max_var - min_var) / (2**bits - 2)
    min_adj = scale * round(min_var / scale)
    max_adj = max_var + min_adj - min_var
    print("Scale : ", scale)
    # print("Min adjusted : ", min_adj)
    # print("Max adjusted : ", max_adj)
    # print("Unadjusted min", min_var)
    # print("Unadjusted max", max_var)
    return scale * np.round(input / scale)

self_quantized = quantize_function(q_aware_model.layers[11].get_weights()[0], min_var.numpy(), max_var.numpy(), bit_width, narrow_range = True)
print("Self quantized : ")
print(self_quantized[:5,:5])
new_quantized = np.round(new_quantized_and_dequantized / max_var * (2**(bit_width - 1) - 1))
print("New quantized")
print(new_quantized[:5,:5])

Scale :  0.0061311890759806
Self quantized : 
[[ 0.28816587 -0.57633173 -0.19006686 -0.453708   -0.34947777]
 [ 0.20232923 -0.04291832  0.19006686 -0.2207228  -0.21459161]
 [-0.5334134   0.06744308 -0.14714853  0.1103614   0.19619805]
 [ 0.23911637  0.04904951  0.06131189 -0.3985273   0.2207228 ]
 [-0.09809902 -0.06131189  0.26364112 -0.          0.04904951]]
New quantized
[[ 47. -94. -31. -74. -57.]
 [ 33.  -7.  31. -36. -35.]
 [-87.  11. -24.  18.  32.]
 [ 39.   8.  10. -65.  36.]
 [-16. -10.  43.   0.   8.]]


In [460]:
bits = 8
mi = q_aware_model.layers[11].variables[-2].numpy()
ma = q_aware_model.layers[11].variables[-1].numpy()
scale = (ma - mi) / (2**bits - 1)
# foo = scale * np.round(input / scale)
print(scale)


0.10057997610054764


In [205]:
pos = (41,9)
print(quantized_and_dequantized[pos].numpy())
print(new_quantized_and_dequantized[pos].numpy())
print(self_quantized[pos])


# print("Other variables stored in last layer")
print(q_aware_model.layers[11].variables[5:])

-0.15242569
-0.15242569
-0.15242569
[<tf.Variable 'quant_dense_3/pre_activation_min:0' shape=() dtype=float32, numpy=-14.7027>, <tf.Variable 'quant_dense_3/pre_activation_max:0' shape=() dtype=float32, numpy=10.945194>]


In [551]:
l = 2
print(q_aware_model.layers[l].name)
# print(len(q_aware_model.layers[l].variables))
print(q_aware_model.layers[l].variables)
bits = 8
narrow_range = False
min_var = q_aware_model.layers[l].variables[1].numpy()
max_var = q_aware_model.layers[l].variables[2].numpy()
# print(max(q_aware_model.layers[l].variables[1]).numpy())
# print(min(q_aware_model.layers[l].variables[1]).numpy())
# out = quantize_function(q_aware_model.layers[l].variables[1], min_var, max_var, bits, narrow_range)
if not narrow_range:
    scale = (max_var - min_var) / (2**bits - 1)
else:
    scale = (max_var - min_var) / (2**bits - 2)
min_adj = scale * round(min_var / scale)
max_adj = max_var + min_adj - min_var
print("Scale : ", scale)
# print("Min adjusted : ", min_adj)
# print("Max adjusted : ", max_adj)
# print("Unadjusted min", min_var)
# print("Unadjusted max", max_var)
print(scale)

quant_re_lu_9
[<tf.Variable 'quant_re_lu_9/optimizer_step:0' shape=() dtype=int32, numpy=-1>, <tf.Variable 'quant_re_lu_9/output_min:0' shape=() dtype=float32, numpy=-3.9335735>, <tf.Variable 'quant_re_lu_9/output_max:0' shape=() dtype=float32, numpy=4.5417123>]
Scale :  0.0332364138434915
0.0332364138434915


In [278]:
print("Elements of q aware model")
print([elem for elem in dir(q_aware_model) if not elem.startswith('_')])
print([elem for elem in dir(q_aware_model) if elem.startswith('_')])


Elements of q aware model
['activity_regularizer', 'add', 'add_loss', 'add_metric', 'add_update', 'add_variable', 'add_weight', 'build', 'built', 'call', 'compile', 'compiled_loss', 'compiled_metrics', 'compute_dtype', 'compute_loss', 'compute_mask', 'compute_metrics', 'compute_output_shape', 'compute_output_signature', 'count_params', 'distribute_reduction_method', 'distribute_strategy', 'dtype', 'dtype_policy', 'dynamic', 'evaluate', 'evaluate_generator', 'finalize_state', 'fit', 'fit_generator', 'from_config', 'get_config', 'get_input_at', 'get_input_mask_at', 'get_input_shape_at', 'get_layer', 'get_metrics_result', 'get_output_at', 'get_output_mask_at', 'get_output_shape_at', 'get_weight_paths', 'get_weights', 'history', 'inbound_nodes', 'input', 'input_mask', 'input_names', 'input_shape', 'input_spec', 'inputs', 'layers', 'load_weights', 'loss', 'losses', 'make_predict_function', 'make_test_function', 'make_train_function', 'metrics', 'metrics_names', 'name', 'name_scope', 'non_tr

In [279]:
l = 11
print("Elements of layer ", l, "of q aware model")
print([elem for elem in dir(q_aware_model.layers[l]) if not elem.startswith('_')])
print([elem for elem in dir(q_aware_model.layers[l]) if elem.startswith('_')])

Elements of layer  11 of q aware model
['activity_regularizer', 'add_loss', 'add_metric', 'add_update', 'add_variable', 'add_weight', 'build', 'built', 'call', 'compute_dtype', 'compute_mask', 'compute_output_shape', 'compute_output_signature', 'count_params', 'dtype', 'dtype_policy', 'dynamic', 'finalize_state', 'from_config', 'get_config', 'get_input_at', 'get_input_mask_at', 'get_input_shape_at', 'get_output_at', 'get_output_mask_at', 'get_output_shape_at', 'get_weights', 'inbound_nodes', 'input', 'input_mask', 'input_shape', 'input_spec', 'layer', 'losses', 'metrics', 'name', 'name_scope', 'non_trainable_variables', 'non_trainable_weights', 'optimizer_step', 'outbound_nodes', 'output', 'output_mask', 'output_shape', 'quantize_config', 'set_weights', 'stateful', 'submodules', 'supports_masking', 'trainable', 'trainable_variables', 'trainable_weights', 'updates', 'variable_dtype', 'variables', 'weights', 'with_name_scope']
['_TF_MODULE_IGNORED_PROPERTIES', '__annotations__', '__call_

In [461]:
l = 11
print(q_aware_model.layers[l].name)
print(q_aware_model.layers[l].get_config())

quant_dense_3
{'name': 'quant_dense_3', 'trainable': True, 'dtype': 'float32', 'layer': {'class_name': 'Dense', 'config': {'name': 'dense_3', 'trainable': True, 'dtype': 'float32', 'units': 10, 'activation': {'class_name': 'QuantizeAwareActivation', 'config': {'activation': 'softmax'}}, 'use_bias': True, 'kernel_initializer': {'class_name': 'GlorotUniform', 'config': {'seed': None}}, 'bias_initializer': {'class_name': 'Zeros', 'config': {}}, 'kernel_regularizer': None, 'bias_regularizer': None, 'activity_regularizer': None, 'kernel_constraint': None, 'bias_constraint': None}}, 'quantize_config': {'class_name': 'Default8BitQuantizeConfig', 'config': {'weight_attrs': ['kernel'], 'activation_attrs': ['activation'], 'quantize_output': False}}}


In [277]:
l = 11
print(dir(q_aware_model.layers[l].quantize_config))
print(type(q_aware_model.layers[l].quantize_config))
conf : tfmot.quantization.keras.default_8bit = q_aware_model.layers[l].quantize_config
print("Type")
print(q_aware_model.layers[l].quantize_config.get_config())
print(q_aware_model.layers[l].quantize_config.activation_quantizer)

['__abstractmethods__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_abc_impl', 'activation_attrs', 'activation_quantizer', 'from_config', 'get_activations_and_quantizers', 'get_config', 'get_output_quantizers', 'get_weights_and_quantizers', 'quantize_output', 'set_quantize_activations', 'set_quantize_weights', 'weight_attrs', 'weight_quantizer']
<class 'tensorflow_model_optimization.python.core.quantization.keras.default_8bit.default_8bit_quantize_registry.Default8BitQuantizeConfig'>
Type
{'weight_attrs': ['kernel'], 'activation_attrs': ['activation'], 'quantize_output': False}
<tensorflow_model_optimization.python.core.quantization.keras.quantizers.MovingAverageQuantizer object at 0x00000

In [466]:
l = 1
print(q_aware_model.layers[l].name)
# print(q_aware_model.layers[l].quantize_config.get_weights_and_quantizers(model.layers[l]))
print(q_aware_model.layers[l].variables[1][-9:])

quant_conv2d_9
tf.Tensor(
[-0.2834741  -0.0156017   0.03915993  0.15401286 -0.0032179   0.01683025
 -0.00186775 -0.01584737 -0.21884577], shape=(9,), dtype=float32)


In [527]:
# Conversion to TF Lite model
converter = tf.lite.TFLiteConverter.from_keras_model(q_aware_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
    
quantized_tflite_model = converter.convert()



INFO:tensorflow:Assets written to: C:\Users\rosal\AppData\Local\Temp\tmpgeabsba2\assets


INFO:tensorflow:Assets written to: C:\Users\rosal\AppData\Local\Temp\tmpgeabsba2\assets


In [528]:
def evaluate_model(interpreter: tf.lite.Interpreter):
  input_index = interpreter.get_input_details()[0]["index"]
  output_index = interpreter.get_output_details()[0]["index"]

  # Run predictions on every image in the "test" dataset.
  prediction_digits = []
  for i, test_image in enumerate(test_images):
    if i % 1000 == 0:
      print('Evaluated on {n} results so far.'.format(n=i))
    # Pre-processing: add batch dimension and convert to float32 to match with
    # the model's input data format.
    # print("Shape : ", test_image.shape)
    test_image = np.expand_dims(test_image, axis = 0).astype(np.float32)
    test_image = np.expand_dims(test_image, axis = 3).astype(np.float32)
    # print("New Shape : ", test_image.shape)
    interpreter.set_tensor(input_index, test_image)

    # Run inference.
    interpreter.invoke()

    # Post-processing: remove batch dimension and find the digit with highest
    # probability.
    output = interpreter.tensor(output_index)
    digit = np.argmax(output()[0])
    prediction_digits.append(digit)

  print('\n')
  # Compare prediction results with ground truth labels to calculate accuracy.
  prediction_digits = np.array(prediction_digits)
  accuracy = (prediction_digits == test_labels).mean()
  return accuracy

In [529]:
interpreter = tf.lite.Interpreter(model_content = quantized_tflite_model)
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
print("Input Shape: ", input_details[0]['shape'])
print("Output Shape: ", output_details[0]['shape'])

test_accuracy = evaluate_model(interpreter)

print('Quant TFLite test_accuracy:', test_accuracy)
print('Quant TF test accuracy:', q_aware_test_acc)

Input Shape:  [ 1 28 28  1]
Output Shape:  [ 1 10]
Evaluated on 0 results so far.
Evaluated on 1000 results so far.
Evaluated on 2000 results so far.
Evaluated on 3000 results so far.
Evaluated on 4000 results so far.
Evaluated on 5000 results so far.
Evaluated on 6000 results so far.
Evaluated on 7000 results so far.
Evaluated on 8000 results so far.
Evaluated on 9000 results so far.


Quant TFLite test_accuracy: 0.908
Quant TF test accuracy: 0.9082000255584717


In [530]:
save_dir = "./logs/"
quant_file = 'quant_model_q3.tflite'
save_path = save_dir + quant_file
# with open(save_path, 'wb') as f:
#   f.write(quantized_tflite_model)

In [531]:
ind_index = 10
test_image = test_images[ind_index]
test_image = np.expand_dims(test_image, axis = 0).astype(np.float32)
test_image = np.expand_dims(test_image, axis = 3).astype(np.float32)

interpreter.set_tensor(input_details[0]['index'], test_image)
interpreter.invoke()
output_array = interpreter.get_tensor(output_details[0]['index'])
# print(output_array.shape)
digit = np.argmax(output_array[0])
probability = max(output_array[0])
print("Input index : ", test_labels[ind_index])
print("Output index : ", digit, "{0:.2%}".format(probability))


Input index :  4
Output index :  4 99.61%


In [16]:
# # interpreter.allocate_tensors()
# tensor_details = interpreter.get_tensor_details()

# for dict in tensor_details: 
#     i = dict['index']
#     tensor_name = dict['name']
#     scales = dict['quantization_parameters']['scales']
#     zero_points = dict['quantization_parameters']['zero_points']
#     tensor = interpreter.tensor(i)()
    
#     print(i, tensor_name, scales.shape, zero_points.shape, tensor.shape)

0 serving_default_conv2d_input:0 (0,) (0,) (1, 28, 28, 1)
1 sequential/quant_flatten/Const (0,) (0,) (2,)
2 sequential/quant_conv2d/BiasAdd/ReadVariableOp (32,) (32,) (32,)
3 sequential/quant_conv2d_1/BiasAdd/ReadVariableOp (64,) (64,) (64,)
4 sequential/quant_conv2d_2/BiasAdd/ReadVariableOp (96,) (96,) (96,)
5 sequential/quant_conv2d/Conv2D (32,) (32,) (32, 5, 5, 1)
6 sequential/quant_conv2d_1/Conv2D (64,) (64,) (64, 5, 5, 32)
7 sequential/quant_conv2d_2/Conv2D (96,) (96,) (96, 3, 3, 64)
8 sequential/quant_dense/BiasAdd/ReadVariableOp (1,) (1,) (10,)


ValueError: Tensor data is null. Run allocate_tensors() first