In [None]:
import tensorflow as tf
import numpy as np
import os

## Loading CIFAR10 Data

In [None]:
(_, _), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
# Reducing Target innecessary dimension
y_test  = np.squeeze(y_test)

## Creating Dataset Objects

In [None]:
# Normalize Images
def normalize_img(x_, y_):
    return tf.cast(x_, tf.float32) / 255., y_

# 1-hot encoding
def to_categorical(x_, y_):
    return x_, tf.one_hot(y_, depth=10)

# Resizing 
def process_images(image, label):
    # Resize images from 32x32 to 224x224
    image = tf.image.resize(image, (224,224))
    return image, label

test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_dataset = test_dataset.map(normalize_img)
test_dataset = test_dataset.map(to_categorical)
test_dataset = test_dataset.map(process_images)
test_dataset = test_dataset.batch(1)

## Quantization Layer

In [None]:
def Quantization_layer(tensor, Quantization = True,signed = True, word_size = 12, frac_size = 6):
    
    factor = 2.0**frac_size
    
    # Quantized max and min values, in case of the need to implement overflow cases.
    #if signed:
    #    Max_Qvalue = ((1 << (word_size-1)) - 1)/factor
    #    Min_Qvalue = -Max_Qvalue - 1
    #else:
    #    Max_Qvalue = ((1 << (word_size)) - 1)/factor
    #    Min_Qvalue = 0
    
    if Quantization:
        return tf.round(tensor*factor) / factor             #Quantization, assuming no overflow
    else:
        return tensor                                       #Simple Bypass

## Creating VGG Model

In [None]:
from tensorflow.keras.layers import Conv2D, Dense, MaxPool2D, Flatten, Lambda


def build_model(input_layer, Quantization = True, signed = True, word_size = 12, frac_size = 6 ):
    
    Arguments = {'Quantization':Quantization, 'signed':signed, 'word_size':word_size, 'frac_size':frac_size}
    QInp      = Lambda(Quantization_layer, arguments = Arguments )(input_layer)
    
    #Conv Block
    Conv1   = Conv2D(filters=64,kernel_size=(3,3),padding="same")(QInp)
    QConv1  = Lambda(Quantization_layer, arguments = Arguments )(Conv1)
    Relu1   = tf.keras.activations.relu(QConv1)
    QRelu1  = Lambda(Quantization_layer, arguments = Arguments )(Relu1)
    
    Conv2   = Conv2D(filters=64,kernel_size=(3,3),padding="same")(QRelu1)
    QConv2  = Lambda(Quantization_layer, arguments = Arguments )(Conv2)
    Relu2   = tf.keras.activations.relu(QConv2)
    QRelu2  = Lambda(Quantization_layer, arguments = Arguments )(Relu2)
    MP2     = MaxPool2D(pool_size=(2,2),strides=(2,2))(QRelu2)
    
    Conv3   = Conv2D(filters=128, kernel_size=(3,3), padding="same")(MP2)
    QConv3  = Lambda(Quantization_layer, arguments = Arguments )(Conv3)
    Relu3   = tf.keras.activations.relu(QConv3)
    QRelu3  = Lambda(Quantization_layer, arguments = Arguments )(Relu3)
    
    Conv4   = Conv2D(filters=128, kernel_size=(3,3), padding="same")(QRelu3)
    QConv4  = Lambda(Quantization_layer, arguments = Arguments )(Conv4)
    Relu4   = tf.keras.activations.relu(QConv4)
    QRelu4  = Lambda(Quantization_layer, arguments = Arguments )(Relu4)
    MP4     = MaxPool2D(pool_size=(2,2),strides=(2,2))(QRelu4)
    
    Conv5   = Conv2D(filters=256, kernel_size=(3,3), padding="same")(MP4)
    QConv5  = Lambda(Quantization_layer, arguments = Arguments )(Conv5)
    Relu5   = tf.keras.activations.relu(QConv5)
    QRelu5  = Lambda(Quantization_layer, arguments = Arguments )(Relu5)
    
    Conv6   = Conv2D(filters=256, kernel_size=(3,3), padding="same")(QRelu5)
    QConv6  = Lambda(Quantization_layer, arguments = Arguments )(Conv6)
    Relu6   = tf.keras.activations.relu(QConv6)
    QRelu6  = Lambda(Quantization_layer, arguments = Arguments )(Relu6)
    
    Conv7   = Conv2D(filters=256, kernel_size=(3,3), padding="same")(QRelu6)
    QConv7  = Lambda(Quantization_layer, arguments = Arguments )(Conv7)
    Relu7   = tf.keras.activations.relu(QConv7)
    QRelu7  = Lambda(Quantization_layer, arguments = Arguments )(Relu7)
    MP7     = MaxPool2D(pool_size=(2,2),strides=(2,2))(QRelu7)
    
    Conv8   = Conv2D(filters=512, kernel_size=(3,3), padding="same")(MP7)
    QConv8  = Lambda(Quantization_layer, arguments = Arguments )(Conv8)
    Relu8   = tf.keras.activations.relu(QConv8)
    QRelu8  = Lambda(Quantization_layer, arguments = Arguments )(Relu8)
    
    Conv9   = Conv2D(filters=512, kernel_size=(3,3), padding="same")(QRelu8)
    QConv9  = Lambda(Quantization_layer, arguments = Arguments )(Conv9)
    Relu9   = tf.keras.activations.relu(QConv9)
    QRelu9  = Lambda(Quantization_layer, arguments = Arguments )(Relu9)
    
    Conv10   = Conv2D(filters=512, kernel_size=(3,3), padding="same")(QRelu9)
    QConv10  = Lambda(Quantization_layer, arguments = Arguments )(Conv10)
    Relu10   = tf.keras.activations.relu(QConv10)
    QRelu10  = Lambda(Quantization_layer, arguments = Arguments )(Relu10)
    MP10     = MaxPool2D(pool_size=(2,2),strides=(2,2))(QRelu10)
    
    Conv11   = Conv2D(filters=512, kernel_size=(3,3), padding="same")(MP10)
    QConv11  = Lambda(Quantization_layer, arguments = Arguments )(Conv11)
    Relu11   = tf.keras.activations.relu(QConv11)
    QRelu11  = Lambda(Quantization_layer, arguments = Arguments )(Relu11)
    
    Conv12   = Conv2D(filters=512, kernel_size=(3,3), padding="same")(QRelu11)
    QConv12  = Lambda(Quantization_layer, arguments = Arguments )(Conv12)
    Relu12   = tf.keras.activations.relu(QConv12)
    QRelu12  = Lambda(Quantization_layer, arguments = Arguments )(Relu12)
    
    Conv13   = Conv2D(filters=512, kernel_size=(3,3), padding="same")(QRelu12)
    QConv13  = Lambda(Quantization_layer, arguments = Arguments )(Conv13)
    Relu13   = tf.keras.activations.relu(QConv13)
    QRelu13  = Lambda(Quantization_layer, arguments = Arguments )(Relu13)
    MP13     = MaxPool2D(pool_size=(2,2),strides=(2,2))(QRelu13)
    
    Flat    = Flatten()(MP13)
    
    Dense14  = Dense(4096)(Flat)
    QDense14 = Lambda(Quantization_layer, arguments = Arguments )(Dense14)
    Relu14   = tf.keras.activations.relu(QDense14)
    QRelu14  = Lambda(Quantization_layer, arguments = Arguments )(Relu14)
    
    Dense15  = Dense(4096)(QRelu14)
    QDense15 = Lambda(Quantization_layer, arguments = Arguments )(Dense15)
    Relu15   = tf.keras.activations.relu(QDense15)
    QRelu15  = Lambda(Quantization_layer, arguments = Arguments )(Relu15)
    
    Dense16  = Dense(10)(QRelu15)
    QDense16 = Lambda(Quantization_layer, arguments = Arguments )(Dense16)
    SM16     = tf.keras.activations.softmax(QDense16)
    QSM16    = Lambda(Quantization_layer, arguments = Arguments )(SM16)
    
    return QSM16

## Loading Quantized model and Non Quantized model 

In [None]:
input_layer   = tf.keras.Input((224,224,3))
output_layer  = build_model(input_layer, Quantization = False)

#For this example we using 8 bits of precision.
Qinput_layer  = tf.keras.Input((224,224,3))
Qoutput_layer = build_model(Qinput_layer, Quantization = True, word_size = 14, frac_size = 8)

VGG16  = tf.keras.Model(inputs=input_layer, outputs=output_layer)
QVGG16 = tf.keras.Model(inputs=Qinput_layer, outputs=Qoutput_layer)

## Loading Pretrained Weights

In [None]:
# Loading Wieghts
cwd = os.getcwd()
Wgt_dir = os.path.join(cwd,'TrainedWeights')
Wgt_dir = os.path.join(Wgt_dir,'Weights')
VGG16.load_weights(Wgt_dir)
QVGG16.load_weights(Wgt_dir)

# Visualize AlexNet Architecture
#VGG16.summary()

# Visualize initialized weights
#VGG16.weights

## Quantize weights

In [None]:
def Quantization(List, Quantization = True, signed = True, word_size = 12, frac_size = 6):
    factor = 2.0**frac_size
    return tf.round(np.array(List)*factor) / factor             #Quantization, assuming no overflow

for layer in QVGG16.layers:
    weights = layer.get_weights()
    if weights:                     # Layer with weights
        # Quantization of Weights and Bias 
        Qweights    = [Quantization(itm, word_size = 14, frac_size = 8) for itm in weights]
        layer.set_weights(Qweights)

## Checking the model predictions

In [None]:
import time
import matplotlib.pyplot as plt

In [None]:
# Iterator over test Dataset
iterator  = iter(test_dataset)

In [None]:
# Getting new image from iterator
image     = next(iterator)
image_plt = image[0][0,...,0]

In [None]:
# Plotting Test image
plt.imshow(image_plt)

In [14]:
# Target
tf.print("Target:",np.argmax(image[1]))
# Predicted Output
print("Prediction:",np.argmax(VGG16.predict(image[0])))
# Quantized Predicted Output
print("QPrediction:",np.argmax(QVGG16.predict(image[0])))

Target: 3
Prediction: 3
QPrediction: 3


## Comparation of Output

In [16]:
VGG16.predict(image[0])

array([[1.9180329e-04, 2.3970929e-04, 3.2078254e-03, 6.8547988e-01,
        2.5826562e-03, 2.8004652e-01, 1.9593343e-02, 5.7677077e-03,
        2.2453316e-03, 6.4517255e-04]], dtype=float32)

In [17]:
QVGG16.predict(image[0])

array([[0.        , 0.        , 0.00390625, 0.68359375, 0.00390625,
        0.2890625 , 0.015625  , 0.00390625, 0.        , 0.        ]],
      dtype=float32)

#### Classes

0. airplane
1. automobile
2. bird
3. cat
4. deer
5. dog
6. frog
7. horse
8. ship
9. truck

## Check the general Accuracy

In [None]:
# Optimization params
# -------------------

# Loss
loss = tf.keras.losses.CategoricalCrossentropy()

# learning rate
lr = 1e-3
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
# -------------------

# Validation metrics
# ------------------

metrics = ['accuracy']
# ------------------

# Compile Model
VGG16.compile(optimizer=optimizer, loss=loss, metrics=metrics)
QVGG16.compile(optimizer=optimizer, loss=loss, metrics=metrics)

In [19]:
VGG16.evaluate(test_dataset)



[0.8012385272836876, 0.7349]

In [20]:
QVGG16.evaluate(test_dataset)



[0.8922459922850597, 0.7359]

## Checking The Output of Each Layer

In [None]:
from tensorflow.python.keras import backend as K
from tensorflow.python.keras.backend import eager_learning_phase_scope

# Function to get outputs from each layer.
def get_all_outputs(model, input_data, learning_phase=False):
    outputs = [layer.output for layer in model.layers] # exclude Input
    layers_fn = K.function([model.input, K.symbolic_learning_phase()], outputs)
    return layers_fn([input_data, learning_phase])

In [None]:
# List for layer names.

Layer_Names = []
for layer in VGG16.layers:
    Layer_Names.append(layer.name)

QLayer_Names = []
for layer in QVGG16.layers:
    QLayer_Names.append(layer.name)

In [None]:
# Dictionary with layer name -> outputs
Layers_Outputs  = dict(zip(Layer_Names, get_all_outputs(VGG16,image[0])))
QLayers_Outputs = dict(zip(QLayer_Names, get_all_outputs(QVGG16,image[0])))

## Comparing Outputs for The convolution #3

In [None]:
Layers_Outputs['lambda_5']

In [None]:
QLayers_Outputs['lambda_38']

## Testing Max and Min Values of Each Layer for the original model

In [31]:
N_layers  = 72
iterator  = iter(test_dataset)
image     = next(iterator,'Stop')
Max_values = [0]*72
Min_values = [0]*72
while image != 'Stop':
    Model_outputs = get_all_outputs(VGG16,image[0])
    Max_iteration_values = np.array([np.max(itm) for itm in Model_outputs])
    Min_iteration_values = np.array([np.min(itm) for itm in Model_outputs])
    Max_values = np.maximum(Max_values, Max_iteration_values)
    Min_values = np.minimum(Min_values, Min_iteration_values)
    image = next(iterator,'Stop')

In [32]:
Max_values

array([ 1.        ,  1.        ,  0.55289382,  0.55289382,  0.55289382,
        0.55289382,  0.56202006,  0.56202006,  0.56202006,  0.56202006,
        0.56202006,  0.40619522,  0.40619522,  0.40619522,  0.40619522,
        0.57603204,  0.57603204,  0.57603204,  0.57603204,  0.57603204,
        0.672952  ,  0.672952  ,  0.672952  ,  0.672952  ,  1.06780303,
        1.06780303,  1.06780303,  1.06780303,  1.74278188,  1.74278188,
        1.74278188,  1.74278188,  1.74278188,  1.6580162 ,  1.6580162 ,
        1.6580162 ,  1.6580162 ,  1.94520521,  1.94520521,  1.94520521,
        1.94520521,  2.26748776,  2.26748776,  2.26748776,  2.26748776,
        2.26748776,  2.46216726,  2.46216726,  2.46216726,  2.46216726,
        3.3225956 ,  3.3225956 ,  3.3225956 ,  3.3225956 ,  3.02389812,
        3.02389812,  3.02389812,  3.02389812,  3.02389812,  3.02389812,
        4.39980793,  4.39980793,  4.39980793,  4.39980793,  4.64638805,
        4.64638805,  4.64638805,  4.64638805, 44.03063965, 44.03

In [33]:
Min_values

array([  0.        ,   0.        ,  -0.67023152,  -0.67023152,
         0.        ,   0.        ,  -0.55003768,  -0.55003768,
         0.        ,   0.        ,   0.        ,  -0.65099853,
        -0.65099853,   0.        ,   0.        ,  -0.53201503,
        -0.53201503,   0.        ,   0.        ,   0.        ,
        -0.66176283,  -0.66176283,   0.        ,   0.        ,
        -0.94754082,  -0.94754082,   0.        ,   0.        ,
        -1.39863992,  -1.39863992,   0.        ,   0.        ,
         0.        ,  -1.55851161,  -1.55851161,   0.        ,
         0.        ,  -2.03468847,  -2.03468847,   0.        ,
         0.        ,  -3.00645447,  -3.00645447,   0.        ,
         0.        ,   0.        ,  -3.01076388,  -3.01076388,
         0.        ,   0.        ,  -3.67845058,  -3.67845058,
         0.        ,   0.        ,  -5.63244438,  -5.63244438,
         0.        ,   0.        ,   0.        ,   0.        ,
        -5.26455212,  -5.26455212,   0.        ,   0.  

## Checking max and min values of weights

In [8]:
for itm in VGG16.get_weights():
    print(np.max(itm))

0.105967745
0.014988257
0.09595051
0.011936342
0.10281713
0.015564269
0.08596791
0.019136881
0.09624452
0.012335057
0.07714346
0.012018733
0.067087404
0.011228289
0.07069143
0.016360529
0.08120797
0.021275712
0.07580003
0.018184675
0.0711499
0.036662944
0.07354726
0.033380628
0.06902299
0.032600958
0.064379156
0.026954189
0.071879946
0.03205061
0.053636953
0.011063661


In [9]:
for itm in VGG16.get_weights():
    print(np.min(itm))

-0.115536876
-0.016275147
-0.10245023
-0.016122118
-0.08389944
-0.017848462
-0.087104104
-0.017060434
-0.08118384
-0.014572672
-0.0745357
-0.011928877
-0.074435614
-0.012825207
-0.07202102
-0.014688623
-0.06944868
-0.013372264
-0.083173715
-0.03949675
-0.07473356
-0.02615708
-0.07346563
-0.036137845
-0.07589335
-0.040974997
-0.07180178
-0.015379306
-0.06817566
-0.014832148
-0.07294732
-0.011839713
