In [1]:
import tensorflow as tf
import numpy as np
import os

## Loading Test Data

In [2]:
# Loading Data
(_, _), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
# Adding Channel Lenght Dimension: Expanding from (28x28) to (28x28x1)
x_test = tf.expand_dims(x_test, -1)

## Creating Dataset Objects

In [3]:
# Normalize images
def normalize_img(x_, y_):
    return tf.cast(x_, tf.float32) / 255., y_

# 1-hot encoding
def to_categorical(x_, y_):
    return x_, tf.one_hot(y_, depth=10)


test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_dataset = test_dataset.map(normalize_img)
test_dataset = test_dataset.map(to_categorical)
test_dataset = test_dataset.batch(1)

## Quantization Layer

In [4]:
def Quantization_layer(tensor, Quantization = True,signed = True, word_size = 12, frac_size = 6):
    
    factor = 2.0**frac_size
    
    # Quantized max and min values, in case of the need to implement overflow cases.
    #if signed:
    #    Max_Qvalue = ((1 << (word_size-1)) - 1)/factor
    #    Min_Qvalue = -Max_Qvalue - 1
    #else:
    #    Max_Qvalue = ((1 << (word_size)) - 1)/factor
    #    Min_Qvalue = 0
    
    if Quantization:
        return tf.round(tensor*factor) / factor             #Quantization, assuming no overflow
    else:
        return tensor                                       #Simple Bypass

## Creating Lenet Model

In [5]:
def build_model(input_layer, Quantization = True, signed = True, word_size = 12, frac_size = 6 ):
    Arguments = {'Quantization':Quantization, 'signed':signed, 'word_size':word_size, 'frac_size':frac_size}
    QInp      = tf.keras.layers.Lambda(Quantization_layer, name="QInp",  arguments = Arguments )(input_layer)
    #Conv Block
    Conv1   = tf.keras.layers.Conv2D(6, kernel_size=5, strides=1, input_shape=(28,28,1), padding='same', name= 'Conv1')(QInp)
    QConv1  = tf.keras.layers.Lambda(Quantization_layer, name="QConv1",  arguments = Arguments )(Conv1)
    Act1    = tf.keras.activations.tanh(QConv1)
    QAct1   = tf.keras.layers.Lambda(Quantization_layer, name="QAct1",   arguments = Arguments )(Act1)
    AvgPool1= tf.keras.layers.AveragePooling2D(name='AvgPool1')(QAct1)
    #Conv Block
    Conv2   = tf.keras.layers.Conv2D(16, kernel_size=5, strides=1, padding='valid',name='Conv2')(AvgPool1)
    QConv2  = tf.keras.layers.Lambda(Quantization_layer, name="QConv2",  arguments = Arguments )(Conv2)
    Act2    = tf.keras.activations.tanh(QConv2)
    QAct2   = tf.keras.layers.Lambda(Quantization_layer, name="QAct2",   arguments = Arguments )(Act2)
    AvgPool2= tf.keras.layers.AveragePooling2D(name='AvgPool2')(QAct2)
    Flatten = tf.keras.layers.Flatten(name='Flatten')(AvgPool2)
    #Dense Block
    Dense1  = tf.keras.layers.Dense(units=120, name='Dense1')(Flatten)
    QDense1 = tf.keras.layers.Lambda(Quantization_layer, name="QDense1", arguments = Arguments )(Dense1)
    Act3    = tf.keras.activations.tanh(QDense1)
    QAct3   = tf.keras.layers.Lambda(Quantization_layer, name="QAct3",   arguments = Arguments )(Act3)
    #Dense Block
    Dense2  = tf.keras.layers.Dense(units=84, name='Dense2')(QAct3)
    QDense2 = tf.keras.layers.Lambda(Quantization_layer, name="QDense2", arguments = Arguments)(Dense2)
    Act4    = tf.keras.activations.tanh(QDense2)
    QAct4   = tf.keras.layers.Lambda(Quantization_layer, name="QAct4",   arguments = Arguments)(Act4)
    #Output Block
    Out     = tf.keras.layers.Dense(units=10,name='Output')(QAct4)
    QOut    = tf.keras.layers.Lambda(Quantization_layer, name="QOut",    arguments = Arguments)(Out)
    Act5    = tf.keras.activations.softmax(QOut)
    QAct5   = tf.keras.layers.Lambda(Quantization_layer, name="QSoftmax",arguments = Arguments)(Act5)
    
    return QAct5

## Loading Quantized model and Non Quantized model 

En base al resultado del ultimo item del cuaderno "Lenet 5 Analysis" las entradas/salidas de las capas tienen valores que varian entre -15.6 y 15.8, aunque los valores maximos y minimos varian enormemente entre capas, por ahora realizaremos una cuantizacion igual para cada capa.

Considerando un bit para el signo, se necesitan al menos 4 bits para la parte entera, se medirá el accuracy de modelos en funcion del numero de bits destinados a la parte fraccionaria.

In [74]:

input_layer   = tf.keras.Input((28, 28, 1))
output_layer  = build_model(input_layer, Quantization = False)


Frac_Bits = 3
Int_Bits  = 4

Qinput_layer  = tf.keras.Input((28, 28, 1))
Qoutput_layer = build_model(Qinput_layer, Quantization = True, word_size = (Frac_Bits+Int_Bits+1), frac_size = Frac_Bits)

Lenet  = tf.keras.Model(inputs=input_layer, outputs=output_layer)
QLenet = tf.keras.Model(inputs=Qinput_layer, outputs=Qoutput_layer)

## Loading Pretrained Weights

In [75]:
# Loading Wieghts
cwd = os.getcwd()
Wgt_dir = os.path.join(cwd,'TrainedWeights')
Wgt_dir = os.path.join(Wgt_dir,'Weights')

Lenet.load_weights(Wgt_dir)
QLenet.load_weights(Wgt_dir)
# Visualize Lenet 5 Architecture
#lenet.summary()

# Visualize initialized weights
#lenet.weights

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x2290be607c8>

## Quantize Weights

En base al resultado del ultimo item del cuaderno "Lenet 5 Analysis" los pesos valores que varian entre -0.9 y 1.6, en este caso no hay grandes variaciones entre valores por capa.

Considerando un bit para el signo, se necesitan al menos 2 bits para la parte entera, se medirá el accuracy de modelos en funcion del numero de bits destinados a la parte fraccionaria.

In [76]:
def Quantization(List, Quantization = True, signed = True, word_size = 12, frac_size = 6):
    factor = 2.0**frac_size
    return tf.round(np.array(List)*factor) / factor             #Quantization, assuming no overflow

Frac_Bits = 3
Int_Bits  = 2

for layer in QLenet.layers:
    weights = layer.get_weights()
    if weights:                     # Layer with weights
        # Quantization of Weights and Bias 
        Qweights    = [None,None]
        Qweights[0] = Quantization(weights[0], word_size = (Frac_Bits+Int_Bits+1) , frac_size = Frac_Bits)
        Qweights[1] = Quantization(weights[1], word_size = (Frac_Bits+Int_Bits+1) , frac_size = Frac_Bits)
        layer.set_weights(Qweights)

## Check the general Accuracy

In [77]:
# Optimization params
# -------------------

# Loss
loss = tf.keras.losses.CategoricalCrossentropy()

# learning rate
lr = 1e-3
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
# -------------------

# Validation metrics
# ------------------

metrics = ['accuracy']
# ------------------

# Compile Model
Lenet.compile(optimizer=optimizer, loss=loss, metrics=metrics)
QLenet.compile(optimizer=optimizer, loss=loss, metrics=metrics)

## Original Model

In [67]:
Lenet.evaluate(test_dataset)



[0.3196016080860579, 0.8883]

## 1 bit Precicion for Weights && 1 bit Precicion for Activations

In [68]:
QLenet.evaluate(test_dataset)



[7.602550701546669, 0.4099]

## 2 bit Precicion for Weights && 2 bit Precicion for Activations

In [73]:
QLenet.evaluate(test_dataset)



[1.1184957627177239, 0.8493]

## 3 bit Precicion for Weights && 3 bit Precicion for Activations

In [78]:
QLenet.evaluate(test_dataset)



[0.6948502577155831, 0.8729]

Notice how even with low precicion (2 bits fractionals) the network accuracy is almost unaffected.