In [2]:
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_model_optimization as tfmot
from typing import Tuple, List
from collections import OrderedDict

(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.fashion_mnist.load_data()
train_images = train_images / 255.0
test_images = test_images / 255.0

In [3]:
def evaluate_model(interpreter: tf.lite.Interpreter, test_images, test_labels) -> Tuple[float, float]:
    """ Evaluate TFLite Model:
    -
    Receives the interpreter and returns a tuple of loss and accuracy.
    """
    input_index = interpreter.get_input_details()[0]["index"]
    output_index = interpreter.get_output_details()[0]["index"]

    # Run predictions on every image in the "test" dataset.
    prediction_digits = []
    predictions = []
    for i, test_image in enumerate(test_images):
        # Pre-processing: add batch dimension and convert to float32 to match with the model's input data format.
        test_image = np.expand_dims(test_image, axis = 0).astype(np.float32)
        test_image = np.expand_dims(test_image, axis = 3).astype(np.float32)
        interpreter.set_tensor(input_index, test_image)

        # Run inference.
        interpreter.invoke()

        # Post-processing: remove batch dimension and find the digit with highest probability.
        output = interpreter.tensor(output_index)
        digit = np.argmax(output()[0])
        predictions.append(np.copy(output()[0]))
        prediction_digits.append(digit)

    # Compare prediction results with ground truth labels to calculate accuracy.
    prediction_digits = np.array(prediction_digits)
    predictions = np.array(predictions)
    scce = tf.keras.losses.SparseCategoricalCrossentropy()(test_labels, predictions)

    loss = scce.numpy()
    accuracy = (prediction_digits == test_labels).mean()
    return loss, accuracy

def random_bit_flipper_uniform(value : int) -> Tuple[int, int]:
    """ Random bit flipper with uniform distribution
    -
    Obtains a value and flips one bit at a random position according to a uniform distribution.
    - All values are in 8 bits, MSB have higher probability of getting flipped
    - It is assumed value is a signed 8 bit number """
    bit_pos = np.random.randint(8)
    # Negative 2 Complement conversion
    if value < 0:
        value = (-value ^ 0xFF) + 1
    flip_mask = 1 << bit_pos
    flipped_value = value ^ flip_mask
    # Negative back conversion 2 Complement
    if flipped_value >= 128:
        flipped_value = -((flipped_value ^ 0xFF) + 1)
    return bit_pos, flipped_value

""" Parameters to be tuned:
- Output file name, if you don't update the name manually the previous file won't be deleted. New data will be appended to the end of the file instead.
- Flag that enables training data to be saved, a False flag will decrease running time significantly.
- Flag that enables laplacian related data to be saved.
- Number of simulations per layer.
- Total number of bits that will be flipped randomly from any weight in each layer.
"""
SAVE_FILE_NAME = 'Performance_Multiple_4.csv'
SAVE_TRAINING_PERFORMANCE_FLAG = False
SAVE_LAPLACIAN_DATA_FLAG = True
N_SIMULATIONS_PER_LAYER = 2
N_BITS_TO_FLIP = 20

MODELS_DIR = "./model/"
LOAD_PATH_Q_AWARE = MODELS_DIR + "model_q_aware_final_01"
LOAD_TFLITE_PATH = MODELS_DIR + 'tflite_final_01.tflite'
SAVE_NEW_TFLITE_PATH = MODELS_DIR + 'new_tflite_flip_01.tflite'
OUTPUTS_DIR = "./outputs/"
SAVE_DATA_PATH = OUTPUTS_DIR + SAVE_FILE_NAME

In [4]:
if not os.path.exists(OUTPUTS_DIR):
    os.mkdir(OUTPUTS_DIR)

In [5]:
# Load Q Aware model
q_aware_model : tf.keras.Model
with tfmot.quantization.keras.quantize_scope():
    q_aware_model = tf.keras.models.load_model(LOAD_PATH_Q_AWARE)
# Load TFLite model
interpreter = tf.lite.Interpreter(LOAD_TFLITE_PATH)

In [6]:
# Evaluate accuracy of both models in test set
q_aware_test_loss, q_aware_test_acc = q_aware_model.evaluate(test_images, test_labels)
print('Q Aware model test accuracy : ', "{:0.3%}".format(q_aware_test_acc))
print('Q Aware model test loss: ', q_aware_test_loss)
interpreter.allocate_tensors()
tflite_test_loss, tflite_test_accuracy = evaluate_model(interpreter, test_images, test_labels)
print('TFLite model test accuracy:', "{:0.3%}".format(tflite_test_accuracy))
print('TFLite model test loss: ', tflite_test_loss)

Q Aware model test accuracy :  91.150%
Q Aware model test loss:  0.2890910804271698
TFLite model test accuracy: 91.170%
TFLite model test loss:  0.3396423


In [18]:
# Quantification of values
BIT_WIDTH = 8
quantized_and_dequantized = OrderedDict()
quantized = OrderedDict()
layer_index_list = []
keys_list = []
layers_shapes = []

layer : tfmot.quantization.keras.QuantizeWrapperV2
for i, layer in enumerate(q_aware_model.layers):
    quantizer : tfmot.quantization.keras.quantizers.Quantizer
    weight : tf.Variable
    if hasattr(layer, '_weight_vars'):
        for weight, quantizer, quantizer_vars in layer._weight_vars:
            min_var = quantizer_vars['min_var']
            max_var = quantizer_vars['max_var']

            key = weight.name[:-2]
            layer_index_list.append(i)
            keys_list.append(key)
            layers_shapes.append(weight.numpy().shape)
            quantized_and_dequantized[key] = quantizer(inputs = weight, training = False, weights = quantizer_vars)
            quantized[key] = np.round(quantized_and_dequantized[key] / max_var * (2**(BIT_WIDTH-1)-1))

for key in keys_list:
    print(key)
    kernel_idx = keys_list.index(key)
    layer_index = layer_index_list[kernel_idx]

    m_vars = {variable.name: variable for i, variable in enumerate(q_aware_model.layers[layer_index].non_trainable_variables) if keys_list[kernel_idx] in variable.name}
    min_key = list(key for key in m_vars if "min" in key)[0]
    max_key = list(key for key in m_vars if "max" in key)[0]
    # Random position for weight change and max min variables identification
    if "dense" not in key:
        # It is a convolutional layer
        kernel_row = np.random.randint(0, layers_shapes[kernel_idx][0])
        kernel_column = np.random.randint(0, layers_shapes[kernel_idx][1])
        in_channel = np.random.randint(0, layers_shapes[kernel_idx][2])
        out_channel = np.random.randint(0, layers_shapes[kernel_idx][3])
        position = (kernel_row, kernel_column, in_channel, out_channel)
        kernel_position = (slice(None), slice(None), in_channel, out_channel)
        value_position = (kernel_row, kernel_column)
        # Convolutional layers max is divided per channels
        min_var = m_vars[min_key][out_channel].numpy()
        max_var = m_vars[max_key][out_channel].numpy()
    else:
        # It is a fully connected layer
        kernel_row = None
        kernel_column = None
        in_channel = np.random.randint(0, layers_shapes[kernel_idx][0])
        out_channel = np.random.randint(0, layers_shapes[kernel_idx][1])
        position = (in_channel, out_channel)
        kernel_position = (slice(None), out_channel)
        value_position = (in_channel)
        # Fully connected layer has only 1 max value for the kernel
        min_var = m_vars[min_key].numpy()
        max_var = m_vars[max_key].numpy()

    print(quantized_and_dequantized[key][kernel_position].numpy())
    print(quantized[key][kernel_position])

conv2d/kernel
[[-0.26310202  0.00828668  0.02278836  0.25688702  0.10358348]
 [-0.24652867 -0.15951855  0.1698769   0.19473694 -0.06422175]
 [ 0.25688702 -0.10565515 -0.07043677 -0.13465852 -0.01243002]
 [ 0.12637185  0.02278836  0.1636619  -0.1905936  -0.08079511]
 [-0.07250843  0.02486004  0.16780524 -0.00414334 -0.11808517]]
[[-127.    4.   11.  124.   50.]
 [-119.  -77.   82.   94.  -31.]
 [ 124.  -51.  -34.  -65.   -6.]
 [  61.   11.   79.  -92.  -39.]
 [ -35.   12.   81.   -2.  -57.]]
conv2d_1/kernel
[[-0.05953331 -0.01488333  0.08929997  0.07069581  0.02232499]
 [ 0.16371661 -0.04092915  0.21580826  0.10046247  0.04837082]
 [ 0.11534579 -0.06697498  0.1041833   0.00372083  0.1748791 ]
 [ 0.10790413  0.01860416 -0.01860416 -0.10046247 -0.0111625 ]
 [-0.10046247  0.02976666 -0.07441664 -0.14511245 -0.18232077]]
[[-16.  -4.  24.  19.   6.]
 [ 44. -11.  58.  27.  13.]
 [ 31. -18.  28.   1.  47.]
 [ 29.   5.  -5. -27.  -3.]
 [-27.   8. -20. -39. -49.]]
conv2d_2/kernel
[[-0.05118326  

In [44]:
# Test code
LAYER_TARGET = 0
key = keys_list[LAYER_TARGET]
kernel_idx = keys_list.index(key)
layer_index = layer_index_list[kernel_idx]

m_vars = {variable.name: variable for i, variable in enumerate(q_aware_model.layers[layer_index].non_trainable_variables) if keys_list[kernel_idx] in variable.name}
min_key = list(key for key in m_vars if "min" in key)[0]
max_key = list(key for key in m_vars if "max" in key)[0]

kernel_row = np.random.randint(0, layers_shapes[kernel_idx][0])
kernel_column = np.random.randint(0, layers_shapes[kernel_idx][1])
in_channel = 0
out_channel = 0

if "dense" not in key:
    # It is a convolutional layer
    position = (kernel_row, kernel_column, in_channel, out_channel)
    kernel_position = (slice(None), slice(None), in_channel, out_channel)
    value_position = (kernel_row, kernel_column)
    # Convolutional layers max is divided per channels
    min_var = m_vars[min_key][out_channel].numpy()
    max_var = m_vars[max_key][out_channel].numpy()
else:
    # It is a fully connected layer
    position = (in_channel, out_channel)
    kernel_position = (slice(None), out_channel)
    value_position = (in_channel)
    # Fully connected layer has only 1 max value for the kernel
    min_var = m_vars[min_key].numpy()
    max_var = m_vars[max_key].numpy()
    
T_VARIABLES_KERNEL_INDEX = 0
T_VARIABLES_BIAS_INDEX = 1
print(layers_shapes[kernel_idx])
print(min_var)
print(max_var)
print(q_aware_model.layers[layer_index].trainable_variables[T_VARIABLES_KERNEL_INDEX][kernel_position].numpy())
print(q_aware_model.layers[layer_index].trainable_variables[T_VARIABLES_BIAS_INDEX][out_channel].numpy())
print(quantized_and_dequantized[key][kernel_position].numpy())
print(quantized[key][kernel_position])
# From TFLite model
QUANTIZED_BIAS = 2300
SCALE_BIAS = 0.000012908661119581666
false_quantized_bias = QUANTIZED_BIAS*SCALE_BIAS
print(false_quantized_bias)
ZERO = 7
OUTPUT_SCALE = 0.03260320425033569


(5, 5, 1, 32)
-0.41804698
0.41804698
[[-0.11190058  0.02968932  0.13516179 -0.21755771 -0.41798258]
 [-0.23117375  0.02469435  0.18612708  0.08809777 -0.24172066]
 [ 0.01567534  0.130002   -0.09754552 -0.05695752  0.10985629]
 [ 0.15383062 -0.07598737 -0.00296063 -0.07511681 -0.0047392 ]
 [ 0.12786986  0.0541554  -0.06213173  0.0122421   0.03846057]]
0.029690962
[[-0.11191808  0.02962537  0.13496004 -0.21725276 -0.41804698]
 [-0.23041959  0.02633367  0.18762738  0.08887613 -0.24029471]
 [ 0.01645854  0.12837663 -0.09875125 -0.05595904  0.10862638]
 [ 0.1547103  -0.07570929 -0.00329171 -0.07570929 -0.00329171]
 [ 0.12837663  0.05266733 -0.06254246  0.01316683  0.0395005 ]]
[[ -34.    9.   41.  -66. -127.]
 [ -70.    8.   57.   27.  -73.]
 [   5.   39.  -30.  -17.   33.]
 [  47.  -23.   -1.  -23.   -1.]
 [  39.   16.  -19.    4.   12.]]
0.02968992057503783


In [12]:
l = 2
print(q_aware_model.layers[l].quantize_config.get_config())
print(q_aware_model.layers[l].quantize_config.activation_quantizer)

{'weight_attrs': ['kernel'], 'activation_attrs': ['activation'], 'quantize_output': False}
<tensorflow_model_optimization.python.core.quantization.keras.quantizers.MovingAverageQuantizer object at 0x000001677C8220E0>
