In [None]:
import tensorflow as tf
import numpy as np
import os

## Loading Test Data

In [None]:
# Loading Data
(_, _), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
# Adding Channel Lenght Dimension: Expanding from (28x28) to (28x28x1)
x_test = tf.expand_dims(x_test, -1)

## Creating Dataset Objects

In [None]:
# Normalize images
def normalize_img(x_, y_):
    return tf.cast(x_, tf.float32) / 255., y_

# 1-hot encoding
def to_categorical(x_, y_):
    return x_, tf.one_hot(y_, depth=10)


test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_dataset = test_dataset.map(normalize_img)
test_dataset = test_dataset.map(to_categorical)
test_dataset = test_dataset.batch(1)

## Quantization Layer

In [None]:
def Quantization_layer(tensor, Quantization = True,signed = True, word_size = 12, frac_size = 6):
    
    factor = 2.0**frac_size
    
    # Quantized max and min values, in case of the need to implement overflow cases.
    #if signed:
    #    Max_Qvalue = ((1 << (word_size-1)) - 1)/factor
    #    Min_Qvalue = -Max_Qvalue - 1
    #else:
    #    Max_Qvalue = ((1 << (word_size)) - 1)/factor
    #    Min_Qvalue = 0
    
    if Quantization:
        return tf.round(tensor*factor) / factor             #Quantization, assuming no overflow
    else:
        return tensor                                       #Simple Bypass

## Creating Lenet Model

Nota: ademas de cuantizarse las entradas, se agrega una capa de cuantizacion luego de cada capa que realice cambios sobre los datos (Conv,Dense,Activacion)

In [None]:
def build_model(input_layer, Quantization = True, signed = True, word_size = 12, frac_size = 6 ):
    Arguments = {'Quantization':Quantization, 'signed':signed, 'word_size':word_size, 'frac_size':frac_size}
    QInp      = tf.keras.layers.Lambda(Quantization_layer, name="QInp",  arguments = Arguments )(input_layer)
    #Conv Block
    Conv1   = tf.keras.layers.Conv2D(6, kernel_size=5, strides=1, input_shape=(28,28,1), padding='same', name= 'Conv1')(QInp)
    QConv1  = tf.keras.layers.Lambda(Quantization_layer, name="QConv1",  arguments = Arguments )(Conv1)
    Act1    = tf.keras.activations.tanh(QConv1)
    QAct1   = tf.keras.layers.Lambda(Quantization_layer, name="QAct1",   arguments = Arguments )(Act1)
    AvgPool1= tf.keras.layers.AveragePooling2D(name='AvgPool1')(QAct1)
    #Conv Block
    Conv2   = tf.keras.layers.Conv2D(16, kernel_size=5, strides=1, padding='valid',name='Conv2')(AvgPool1)
    QConv2  = tf.keras.layers.Lambda(Quantization_layer, name="QConv2",  arguments = Arguments )(Conv2)
    Act2    = tf.keras.activations.tanh(QConv2)
    QAct2   = tf.keras.layers.Lambda(Quantization_layer, name="QAct2",   arguments = Arguments )(Act2)
    AvgPool2= tf.keras.layers.AveragePooling2D(name='AvgPool2')(QAct2)
    Flatten = tf.keras.layers.Flatten(name='Flatten')(AvgPool2)
    #Dense Block
    Dense1  = tf.keras.layers.Dense(units=120, name='Dense1')(Flatten)
    QDense1 = tf.keras.layers.Lambda(Quantization_layer, name="QDense1", arguments = Arguments )(Dense1)
    Act3    = tf.keras.activations.tanh(QDense1)
    QAct3   = tf.keras.layers.Lambda(Quantization_layer, name="QAct3",   arguments = Arguments )(Act3)
    #Dense Block
    Dense2  = tf.keras.layers.Dense(units=84, name='Dense2')(QAct3)
    QDense2 = tf.keras.layers.Lambda(Quantization_layer, name="QDense2", arguments = Arguments)(Dense2)
    Act4    = tf.keras.activations.tanh(QDense2)
    QAct4   = tf.keras.layers.Lambda(Quantization_layer, name="QAct4",   arguments = Arguments)(Act4)
    #Output Block
    Out     = tf.keras.layers.Dense(units=10,name='Output')(QAct4)
    QOut    = tf.keras.layers.Lambda(Quantization_layer, name="QOut",    arguments = Arguments)(Out)
    Act5    = tf.keras.activations.softmax(QOut)
    QAct5   = tf.keras.layers.Lambda(Quantization_layer, name="QSoftmax",arguments = Arguments)(Act5)
    
    return QAct5

## Loading Quantized model and Non Quantized model 

In [None]:

input_layer   = tf.keras.Input((28, 28, 1))
output_layer  = build_model(input_layer, Quantization = False)

#For this example we using 3 bits of precision.
Qinput_layer  = tf.keras.Input((28, 28, 1))
Qoutput_layer = build_model(Qinput_layer, Quantization = True, word_size = 12, frac_size = 3)

Lenet  = tf.keras.Model(inputs=input_layer, outputs=output_layer)
QLenet = tf.keras.Model(inputs=Qinput_layer, outputs=Qoutput_layer)

## Loading Pretrained Weights

In [None]:
# Loading Wieghts
cwd = os.getcwd()
Wgt_dir = os.path.join(cwd,'TrainedWeights')
Wgt_dir = os.path.join(Wgt_dir,'Weights')

Lenet.load_weights(Wgt_dir)
QLenet.load_weights(Wgt_dir)
# Visualize Lenet 5 Architecture
#lenet.summary()

# Visualize initialized weights
#lenet.weights

## Quantize Weights

In [None]:
def Quantization(List, Quantization = True, signed = True, word_size = 12, frac_size = 6):
    factor = 2.0**frac_size
    return tf.round(np.array(List)*factor) / factor             #Quantization, assuming no overflow

for layer in QLenet.layers:
    weights = layer.get_weights()
    if weights:                     # Layer with weights
        # Quantization of Weights and Bias 
        Qweights    = [None,None]
        Qweights[0] = Quantization(weights[0], word_size = 12, frac_size = 3)
        Qweights[1] = Quantization(weights[1], word_size = 12, frac_size = 3)
        layer.set_weights(Qweights)

## Checking Model predictions

In [None]:
import time
import matplotlib.pyplot as plt

In [None]:
# Iterator over test Dataset
iterator  = iter(test_dataset)

In [None]:
# Getting new image from iterator
image     = next(iterator)
image_plt = image[0][0,...,0]

In [None]:
# Plotting Test image
plt.imshow(image_plt, cmap='gray')

In [160]:
# Target
tf.print("Target:",np.argmax(image[1]))
# Predicted Output
print("Prediction:",np.argmax(Lenet.predict(image[0])))
# Quantized Predicted Output
print("QPrediction:",np.argmax(QLenet.predict(image[0])))

Target: 9
Prediction: 9
QPrediction: 9


## Comparation of Output

In [183]:
Lenet.predict(image[0])

array([[9.4945015e-08, 7.8437586e-07, 4.5743948e-07, 2.4961669e-06,
        1.6386039e-06, 1.5448232e-03, 8.7722611e-07, 5.2541746e-03,
        1.1379937e-05, 9.9318331e-01]], dtype=float32)

In [184]:
QLenet.predict(image[0])

array([[0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.125, 0.   ,
        0.75 ]], dtype=float32)

#### Classes

0. T-shirt/top
1. Trouser
2. Pullover
3. Dress
4. Coat
5. Sandal
6. Shirt
7. Sneaker
8. Bag
9. Ankle boot

## Check the general Accuracy

In [None]:
# Optimization params
# -------------------

# Loss
loss = tf.keras.losses.CategoricalCrossentropy()

# learning rate
lr = 1e-3
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
# -------------------

# Validation metrics
# ------------------

metrics = ['accuracy']
# ------------------

# Compile Model
Lenet.compile(optimizer=optimizer, loss=loss, metrics=metrics)
QLenet.compile(optimizer=optimizer, loss=loss, metrics=metrics)

In [138]:
Lenet.evaluate(test_dataset)



[0.3196016080860579, 0.8883]

In [163]:
QLenet.evaluate(test_dataset)



[0.6948502577155831, 0.8729]

Notice how even with low precicion of the fractional (3 bits) the network accuracy is almost unaffected.

## Checking The Output of Each Layer

In [None]:
from tensorflow.python.keras import backend as K
from tensorflow.python.keras.backend import eager_learning_phase_scope

# Function to get outputs from each layer.
def get_all_outputs(model, input_data, learning_phase=False):
    outputs = [layer.output for layer in model.layers] # exclude Input
    layers_fn = K.function([model.input, K.symbolic_learning_phase()], outputs)
    return layers_fn([input_data, learning_phase])

In [None]:
# List for layer names.

Layer_Names = []
for layer in Lenet.layers:
    Layer_Names.append(layer.name)

QLayer_Names = []
for layer in QLenet.layers:
    QLayer_Names.append(layer.name)

In [None]:
# Dictionary with layer name -> outputs
Layers_Outputs  = dict(zip(Layer_Names, get_all_outputs(Lenet,image[0])))
QLayers_Outputs = dict(zip(QLayer_Names, get_all_outputs(QLenet,image[0])))

## Comparing Outputs for QDense2 Layer

In [213]:
# Remember that in the Not Quantized model QLayers are just a bypass hence QDense = Dense
Layers_Outputs['QDense2']

array([[-3.7985363e+00,  2.9917758e+00, -1.9658886e+00, -1.3550131e+00,
         4.0264950e+00, -7.2691590e-01,  9.0934563e-01,  5.2751284e+00,
        -4.5163706e-01,  1.0568833e+00,  1.3927757e+00, -9.1078562e-01,
        -2.3354990e+00, -2.9907271e-01, -1.6706569e+00, -1.5447171e+00,
        -1.8712231e+00,  9.3230104e-01, -2.0313833e+00,  1.7453640e+00,
         3.2470200e+00,  3.6184146e+00,  2.6308894e+00,  1.2353708e+00,
         4.0721264e+00,  2.7568493e+00,  2.8428876e+00, -1.5958537e+00,
         3.3470535e+00, -8.5182175e-02, -2.0848680e+00,  2.1405609e+00,
        -5.0434155e+00, -1.4105461e+00, -2.3029561e+00, -4.7014804e+00,
         1.1015703e-01, -6.4549203e+00,  1.7049364e+00,  2.5652118e+00,
         5.0675601e-01,  2.1410992e+00,  2.1490867e+00, -2.6264310e+00,
        -3.0487020e+00,  1.6208662e+00,  3.3934338e+00,  4.6600504e+00,
         5.4187584e+00, -2.3735075e+00, -6.7448535e+00, -2.7114751e+00,
        -1.4448148e+00, -2.9179442e+00, -7.1346298e-02, -1.34670

In [214]:
QLayers_Outputs['QDense2']

array([[-3.625,  2.125, -0.75 , -1.625,  4.125, -0.375,  1.25 ,  4.   ,
         0.375,  1.25 ,  1.5  , -0.75 , -2.75 ,  0.5  , -1.   , -1.5  ,
        -0.875,  1.75 , -2.625,  0.75 ,  3.5  ,  3.   ,  2.5  ,  1.125,
         3.875,  3.375,  2.625, -1.5  ,  2.375,  0.875, -2.875,  1.25 ,
        -4.875, -1.25 , -1.75 , -5.   ,  0.125, -5.5  ,  1.25 ,  2.625,
         0.   ,  1.875,  3.   , -2.375, -2.875,  0.75 ,  3.   ,  4.625,
         5.5  , -3.   , -6.   , -2.5  , -1.5  , -2.125, -0.75 , -1.5  ,
         4.   , -0.25 ,  0.5  , -0.5  ,  0.   , -1.25 ,  5.75 ,  2.375,
        -2.625,  5.375,  0.875,  0.375, -1.625,  5.5  , -2.5  ,  0.125,
        -4.625,  0.5  , -2.625,  0.5  , -4.875, -2.25 , -7.375, -1.75 ,
         2.25 , -1.75 ,  0.375,  1.875]], dtype=float32)

## Testing Max and Min Values of Each Layer for the original model

In [271]:
N_layers = 25
iterator  = iter(test_dataset)
image     = next(iterator,'Stop')
Max_values = [0]*25
Min_values = [0]*25
while image != 'Stop':
    Model_outputs = get_all_outputs(Lenet,image[0])
    Max_iteration_values = np.array([np.max(itm) for itm in Model_outputs])
    Min_iteration_values = np.array([np.min(itm) for itm in Model_outputs])
    Max_values = np.maximum(Max_values, Max_iteration_values)
    Min_values = np.minimum(Min_values, Min_iteration_values)
    image = next(iterator,'Stop')

In [273]:
Max_values

array([ 1.        ,  1.        ,  6.51962948,  6.51962948,  0.99999565,
        0.99999565,  0.99997681, 11.36214638, 11.36214638,  1.        ,
        1.        ,  0.99999976,  0.99999976, 15.4092989 , 15.4092989 ,
        1.        ,  1.        , 10.59511375, 10.59511375,  1.        ,
        1.        , 15.80233192, 15.80233192,  0.99999881,  0.99999881])

In [274]:
Min_values

array([  0.        ,   0.        ,  -3.21593666,  -3.21593666,
        -0.9967863 ,  -0.9967863 ,  -0.99342388, -12.16223907,
       -12.16223907,  -1.        ,  -1.        ,  -0.99999869,
        -0.99999869, -15.66343212, -15.66343212,  -1.        ,
        -1.        , -11.51284122, -11.51284122,  -1.        ,
        -1.        , -11.09358406, -11.09358406,   0.        ,
         0.        ])

## Checking Max and Min values of Weights

In [9]:
for itm in Lenet.get_weights():
    print(np.max(itm))

1.6256596
0.21230586
0.9251389
0.18390743
0.62760293
0.24772961
0.54453754
0.20400645
0.72902125
0.05587189


In [10]:
for itm in Lenet.get_weights():
    print(np.min(itm))

-0.58532584
-0.2912165
-0.80219436
-0.19225334
-0.9469629
-0.23552617
-0.6235763
-0.22716506
-0.6461236
-0.07768464
