In [None]:
import tensorflow as tf
import numpy as np
import os

## Loading CIFAR10 Data

In [None]:
(_, _), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
# Reducing Target innecessary dimension
y_test  = np.squeeze(y_test)

## Creating Dataset Objects

In [None]:
# Normalize Images
def normalize_img(x_, y_):
    return tf.cast(x_, tf.float32) / 255., y_

# 1-hot encoding
def to_categorical(x_, y_):
    return x_, tf.one_hot(y_, depth=10)

# Resizing 
def process_images(image, label):
    # Resize images from 32x32 to 277x277
    image = tf.image.resize(image, (227,227))
    return image, label

test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_dataset = test_dataset.map(normalize_img)
test_dataset = test_dataset.map(to_categorical)
test_dataset = test_dataset.map(process_images)
test_dataset = test_dataset.batch(1)

## Quantization Layer

In [None]:
def Quantization_layer(tensor, Quantization = True,signed = True, word_size = 12, frac_size = 6):
    
    factor = 2.0**frac_size
    
    # Quantized max and min values, in case of the need to implement overflow cases.
    #if signed:
    #    Max_Qvalue = ((1 << (word_size-1)) - 1)/factor
    #    Min_Qvalue = -Max_Qvalue - 1
    #else:
    #    Max_Qvalue = ((1 << (word_size)) - 1)/factor
    #    Min_Qvalue = 0
    
    if Quantization:
        return tf.round(tensor*factor) / factor             #Quantization, assuming no overflow
    else:
        return tensor                                       #Simple Bypass

## Creating AlexNet Model

In [None]:
from tensorflow.keras.layers import Conv2D, Dense, BatchNormalization, MaxPool2D, Flatten, Dropout, Lambda


def build_model(input_layer, Quantization = True, signed = True, word_size = 12, frac_size = 6 ):
    
    Arguments = {'Quantization':Quantization, 'signed':signed, 'word_size':word_size, 'frac_size':frac_size}
    QInp      = Lambda(Quantization_layer, arguments = Arguments )(input_layer)
    
    #Conv Block
    Conv1   = Conv2D(filters=96, kernel_size=(11,11), strides=(4,4))(QInp)
    QConv1  = Lambda(Quantization_layer, arguments = Arguments )(Conv1)
    Relu1   = tf.keras.activations.relu(QConv1)
    QRelu1  = Lambda(Quantization_layer, arguments = Arguments )(Relu1)
    BN1     = BatchNormalization()(QRelu1)
    QBN1    = Lambda(Quantization_layer, arguments = Arguments )(BN1)
    MP1     = MaxPool2D(pool_size=(3,3), strides=(2,2))(QBN1)
    
    Conv2   = Conv2D(filters=256, kernel_size=(5,5), strides=(1,1),padding="same")(MP1)
    QConv2  = Lambda(Quantization_layer, arguments = Arguments )(Conv2)
    Relu2   = tf.keras.activations.relu(QConv2)
    QRelu2  = Lambda(Quantization_layer, arguments = Arguments )(Relu2)
    BN2     = BatchNormalization()(QRelu2)
    QBN2    = Lambda(Quantization_layer, arguments = Arguments )(BN2)
    MP2     = MaxPool2D(pool_size=(3,3), strides=(2,2))(QBN2)
    
    Conv3   = Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), padding="same")(MP2)
    QConv3  = Lambda(Quantization_layer, arguments = Arguments )(Conv3)
    Relu3   = tf.keras.activations.relu(QConv3)
    QRelu3  = Lambda(Quantization_layer, arguments = Arguments )(Relu3)
    BN3     = BatchNormalization()(QRelu3)
    QBN3    = Lambda(Quantization_layer, arguments = Arguments )(BN3)
    
    Conv4   = Conv2D(filters=384, kernel_size=(1,1), strides=(1,1), padding="same")(QBN3)
    QConv4  = Lambda(Quantization_layer, arguments = Arguments )(Conv4)
    Relu4   = tf.keras.activations.relu(QConv4)
    QRelu4  = Lambda(Quantization_layer, arguments = Arguments )(Relu4)
    BN4     = BatchNormalization()(QRelu4)
    QBN4    = Lambda(Quantization_layer, arguments = Arguments )(BN4)
    
    Conv5   = Conv2D(filters=256, kernel_size=(1,1), strides=(1,1), padding="same")(QBN4)
    QConv5  = Lambda(Quantization_layer, arguments = Arguments )(Conv5)
    Relu5   = tf.keras.activations.relu(QConv5)
    QRelu5  = Lambda(Quantization_layer, arguments = Arguments )(Relu5)
    BN5     = BatchNormalization()(QRelu5)
    QBN5    = Lambda(Quantization_layer, arguments = Arguments )(BN5)
    MP5     = MaxPool2D(pool_size=(3,3), strides=(2,2))(QBN5)
    
    Flat    = Flatten()(MP5)
    
    Dense6  = Dense(4096)(Flat)
    QDense6 = Lambda(Quantization_layer, arguments = Arguments )(Dense6)
    Relu6   = tf.keras.activations.relu(QDense6)
    QRelu6  = Lambda(Quantization_layer, arguments = Arguments )(Relu6)
    Drop6   = Dropout(0.5)(QRelu6)
    
    Dense7  = Dense(4096)(Drop6)
    QDense7 = Lambda(Quantization_layer, arguments = Arguments )(Dense7)
    Relu7   = tf.keras.activations.relu(QDense7)
    QRelu7  = Lambda(Quantization_layer, arguments = Arguments )(Relu7)
    Drop7   = Dropout(0.5)(QRelu7)
    
    Dense8  = Dense(10)(Drop7)
    QDense8 = Lambda(Quantization_layer, arguments = Arguments )(Dense8)
    SM8     = tf.keras.activations.softmax(QDense8)
    QSM8    = Lambda(Quantization_layer, arguments = Arguments )(SM8)
    
    return QSM8

## Loading Quantized model and Non Quantized model 

In [None]:

input_layer   = tf.keras.Input((227,227,3))
output_layer  = build_model(input_layer, Quantization = False)

#For this example we using 8 bits of precision.
Qinput_layer  = tf.keras.Input((227,227,3))
Qoutput_layer = build_model(Qinput_layer, Quantization = True, word_size = 14, frac_size = 8)

AlexNet  = tf.keras.Model(inputs=input_layer, outputs=output_layer)
QAlexNet = tf.keras.Model(inputs=Qinput_layer, outputs=Qoutput_layer)

## Loading Pretrained Weights

In [None]:
# Loading Wieghts
cwd = os.getcwd()
Wgt_dir = os.path.join(cwd,'TrainedWeights')
Wgt_dir = os.path.join(Wgt_dir,'Weights')
AlexNet.load_weights(Wgt_dir)
QAlexNet.load_weights(Wgt_dir)

# Visualize AlexNet Architecture
#AlexNet.summary()

# Visualize initialized weights
#AlexNet.weights

## Quantize weights

In [None]:
def Quantization(List, Quantization = True, signed = True, word_size = 12, frac_size = 6):
    factor = 2.0**frac_size
    return tf.round(np.array(List)*factor) / factor             #Quantization, assuming no overflow

for layer in QAlexNet.layers:
    weights = layer.get_weights()
    if weights:                     # Layer with weights
        # Quantization of Weights and Bias 
        Qweights    = [Quantization(itm, word_size = 14, frac_size = 8) for itm in weights]
        layer.set_weights(Qweights)

## Checking the model predictions

In [None]:
import time
import matplotlib.pyplot as plt

In [None]:
# Iterator over test Dataset
iterator  = iter(test_dataset)

In [None]:
# Getting new image from iterator
image     = next(iterator)
image_plt = image[0][0,...,0]

In [None]:
# Plotting Test image
plt.imshow(image_plt)

In [85]:
# Target
tf.print("Target:",np.argmax(image[1]))
# Predicted Output
print("Prediction:",np.argmax(AlexNet.predict(image[0])))
# Quantized Predicted Output
print("QPrediction:",np.argmax(QAlexNet.predict(image[0])))

Target: 3
Prediction: 3
QPrediction: 3


## Comparation of Output

In [86]:
AlexNet.predict(image[0])

array([[2.46926502e-05, 1.04233695e-05, 6.94376213e-05, 9.56468999e-01,
        6.18058839e-05, 4.23380062e-02, 9.96380695e-04, 2.18989862e-06,
        2.01556504e-05, 7.85208795e-06]], dtype=float32)

In [87]:
QAlexNet.predict(image[0])

array([[0.        , 0.        , 0.        , 0.9609375 , 0.        ,
        0.03515625, 0.        , 0.        , 0.        , 0.        ]],
      dtype=float32)

#### Classes

0. airplane
1. automobile
2. bird
3. cat
4. deer
5. dog
6. frog
7. horse
8. ship
9. truck

## Check the general Accuracy

In [None]:
# Optimization params
# -------------------

# Loss
loss = tf.keras.losses.CategoricalCrossentropy()

# learning rate
lr = 1e-3
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
# -------------------

# Validation metrics
# ------------------

metrics = ['accuracy']
# ------------------

# Compile Model
AlexNet.compile(optimizer=optimizer, loss=loss, metrics=metrics)
QAlexNet.compile(optimizer=optimizer, loss=loss, metrics=metrics)

In [89]:
AlexNet.evaluate(test_dataset)



[0.6315659527876948, 0.7898]

In [90]:
QAlexNet.evaluate(test_dataset)



[0.6723387817996089, 0.7935]

## Checking The Output of Each Layer

In [None]:
from tensorflow.python.keras import backend as K
from tensorflow.python.keras.backend import eager_learning_phase_scope

# Function to get outputs from each layer.
def get_all_outputs(model, input_data, learning_phase=False):
    outputs = [layer.output for layer in model.layers] # exclude Input
    layers_fn = K.function([model.input, K.symbolic_learning_phase()], outputs)
    return layers_fn([input_data, learning_phase])

In [None]:
# List for layer names.

Layer_Names = []
for layer in AlexNet.layers:
    Layer_Names.append(layer.name)

QLayer_Names = []
for layer in QAlexNet.layers:
    QLayer_Names.append(layer.name)

In [None]:
# Dictionary with layer name -> outputs
Layers_Outputs  = dict(zip(Layer_Names, get_all_outputs(AlexNet,image[0])))
QLayers_Outputs = dict(zip(QLayer_Names, get_all_outputs(QAlexNet,image[0])))

## Comparing Outputs for The convolution #2

In [115]:
# Check AlexNet.summary() to look for specific layer.

Layers_Outputs['lambda_180']

array([[[[-2.606743  , -1.4824855 , -1.1377517 , ..., -0.8539708 ,
           0.3383507 ,  0.21045436],
         [-3.0393326 , -2.2158618 , -0.99061877, ..., -1.1680138 ,
           0.25034964,  0.28698456],
         [-2.8706253 , -2.8723023 , -1.119233  , ..., -1.1876609 ,
           0.8022059 ,  0.10838492],
         ...,
         [-2.458041  , -1.8997049 , -1.3837631 , ..., -0.46334904,
           1.4758767 ,  0.21984023],
         [-2.0522811 , -1.6578349 , -1.0190948 , ..., -0.5996378 ,
           0.8184831 ,  0.0976574 ],
         [-1.16906   , -1.1299301 , -0.73243904, ..., -0.5728617 ,
           0.01582111, -0.16498914]],

        [[-2.2159624 , -1.6978645 , -1.233336  , ..., -0.19716816,
           0.2823603 ,  0.31327766],
         [-2.9536593 , -2.7829134 , -1.3210441 , ..., -0.5019381 ,
           0.2555975 ,  0.4370066 ],
         [-2.9007897 , -3.369269  , -1.5034692 , ..., -0.34781575,
           1.2196257 ,  0.24811645],
         ...,
         [-2.450498  , -2.336177  

In [117]:
# Check AlexNet.summary() to look for specific layer.

QLayers_Outputs['lambda_202']

array([[[[-2.6601562 , -1.5195312 , -1.171875  , ..., -0.890625  ,
           0.33203125,  0.19921875],
         [-3.109375  , -2.28125   , -1.03125   , ..., -1.2265625 ,
           0.265625  ,  0.30078125],
         [-2.9296875 , -2.9414062 , -1.1679688 , ..., -1.2539062 ,
           0.828125  ,  0.1171875 ],
         ...,
         [-2.5234375 , -1.9609375 , -1.4179688 , ..., -0.53515625,
           1.4804688 ,  0.234375  ],
         [-2.0898438 , -1.6992188 , -1.0273438 , ..., -0.6484375 ,
           0.8203125 ,  0.10546875],
         [-1.1757812 , -1.1484375 , -0.73046875, ..., -0.6015625 ,
           0.01171875, -0.16015625]],

        [[-2.2773438 , -1.7460938 , -1.28125   , ..., -0.2109375 ,
           0.28515625,  0.28125   ],
         [-3.0390625 , -2.8476562 , -1.3632812 , ..., -0.54296875,
           0.2890625 ,  0.4375    ],
         [-2.96875   , -3.4492188 , -1.5625    , ..., -0.40234375,
           1.265625  ,  0.23828125],
         ...,
         [-2.5234375 , -2.3710938 

## Testing Max and Min Values of Each Layer for the original model

In [131]:
N_layers = 50
iterator  = iter(test_dataset)
image     = next(iterator,'Stop')
Max_values = [0]*50
Min_values = [0]*50
while image != 'Stop':
    Model_outputs = get_all_outputs(AlexNet,image[0])
    Max_iteration_values = np.array([np.max(itm) for itm in Model_outputs])
    Min_iteration_values = np.array([np.min(itm) for itm in Model_outputs])
    Max_values = np.maximum(Max_values, Max_iteration_values)
    Min_values = np.minimum(Min_values, Min_iteration_values)
    image = next(iterator,'Stop')

In [132]:
Max_values

array([ 1.        ,  1.        ,  2.71788788,  2.71788788,  2.71788788,
        2.71788788, 16.73804474, 16.73804474, 16.73804474, 20.78150177,
       20.78150177, 20.78150177, 20.78150177, 37.55444336, 37.55444336,
       37.55444336, 53.57574844, 53.57574844, 53.57574844, 53.57574844,
       27.10181808, 27.10181808, 15.17789745, 15.17789745, 15.17789745,
       15.17789745, 16.10264778, 16.10264778, 15.52670383, 15.52670383,
       15.52670383, 15.52670383, 22.74578285, 22.74578285, 22.74578285,
       22.74578285, 46.52299881, 46.52299881, 46.52299881, 46.52299881,
       46.52299881, 13.92596149, 13.92596149, 13.92596149, 13.92596149,
       13.92596149, 47.77276611, 47.77276611,  1.        ,  1.        ])

In [133]:
Min_values

array([  0.        ,   0.        ,  -2.65676451,  -2.65676451,
         0.        ,   0.        ,  -1.95773494,  -1.95773494,
        -1.95731556, -29.30832863, -29.30832863,   0.        ,
         0.        ,  -1.20609355,  -1.20609355,  -1.20609355,
       -61.70952606, -61.70952606,   0.        ,   0.        ,
        -1.35015488,  -1.35015488, -17.11544037, -17.11544037,
         0.        ,   0.        ,  -0.91049999,  -0.91049999,
       -16.34580803, -16.34580803,   0.        ,   0.        ,
        -0.84334826,  -0.84334826,  -0.84334826,  -0.84334826,
       -79.99137878, -79.99137878,   0.        ,   0.        ,
         0.        , -16.63898468, -16.63898468,   0.        ,
         0.        ,   0.        , -21.47931671, -21.47931671,
         0.        ,   0.        ])

## Checking max and min values of weights

In [48]:
for itm in AlexNet.get_weights():
    print(np.max(itm))

0.060662657
0.054879453
1.1514293
0.08819208
1.2794288
0.3740722
0.09191583
0.035192613
1.0881922
0.027058428
1.9218924
3.1125636
0.0826775
0.033767797
1.0528617
0.05498081
7.267303
28.772236
0.13753861
0.02818342
1.0350039
0.26688427
0.8254347
1.505141
0.13657723
0.006295864
0.9885288
-0.06251466
0.7107444
1.0447234
0.07450771
0.005876748
0.07925708
0.01865592
0.051391937
0.024923662


In [42]:
for itm in AlexNet.get_weights():
    print(np.min(itm))

-0.05607786
-0.023961341
0.7970406
-0.07524538
1.163794e-36
1.1647963e-36
-0.10104939
-0.036688264
0.9350337
-0.047507
0.056569595
0.047031503
-0.086561464
-0.027074352
0.9637472
-0.03758461
0.18171231
0.45579082
-0.1344526
-0.05151097
0.9608286
-0.20731844
0.3464395
0.34071973
-0.14339194
-0.13129726
0.84826046
-0.16713257
0.19749111
0.21392043
-0.085306115
-0.015681174
-0.07879663
-0.0409071
-0.062905505
-0.022243463
