In [1]:
import tensorflow as tf
import numpy as np
import os

## Loading CIFAR10 Data

In [2]:
(_, _), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
# Reducing Target innecessary dimension
y_test  = np.squeeze(y_test)

## Creating Dataset Objects

In [3]:
# Normalize Images
def normalize_img(x_, y_):
    return tf.cast(x_, tf.float32) / 255., y_

# 1-hot encoding
def to_categorical(x_, y_):
    return x_, tf.one_hot(y_, depth=10)

# Resizing 
def process_images(image, label):
    # Resize images from 32x32 to 277x277
    image = tf.image.resize(image, (227,227))
    return image, label

test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_dataset = test_dataset.map(normalize_img)
test_dataset = test_dataset.map(to_categorical)
test_dataset = test_dataset.map(process_images)
test_dataset = test_dataset.batch(1)

## Quantization Layer

In [4]:
def Quantization_layer(tensor, Quantization = True,signed = True, word_size = 12, frac_size = 6):
    
    factor = 2.0**frac_size
    
    # Quantized max and min values, in case of the need to implement overflow cases.
    #if signed:
    #    Max_Qvalue = ((1 << (word_size-1)) - 1)/factor
    #    Min_Qvalue = -Max_Qvalue - 1
    #else:
    #    Max_Qvalue = ((1 << (word_size)) - 1)/factor
    #    Min_Qvalue = 0
    
    if Quantization:
        return tf.round(tensor*factor) / factor             #Quantization, assuming no overflow
    else:
        return tensor                                       #Simple Bypass

## Creating AlexNet Model

In [5]:
from tensorflow.keras.layers import Conv2D, Dense, BatchNormalization, MaxPool2D, Flatten, Dropout, Lambda


def build_model(input_layer, Quantization = True, signed = True, word_size = 12, frac_size = 6 ):
    
    Arguments = {'Quantization':Quantization, 'signed':signed, 'word_size':word_size, 'frac_size':frac_size}
    QInp      = Lambda(Quantization_layer, arguments = Arguments )(input_layer)
    
    #Conv Block
    Conv1   = Conv2D(filters=96, kernel_size=(11,11), strides=(4,4))(QInp)
    QConv1  = Lambda(Quantization_layer, arguments = Arguments )(Conv1)
    Relu1   = tf.keras.activations.relu(QConv1)
    QRelu1  = Lambda(Quantization_layer, arguments = Arguments )(Relu1)
    BN1     = BatchNormalization()(QRelu1)
    QBN1    = Lambda(Quantization_layer, arguments = Arguments )(BN1)
    MP1     = MaxPool2D(pool_size=(3,3), strides=(2,2))(QBN1)
    
    Conv2   = Conv2D(filters=256, kernel_size=(5,5), strides=(1,1),padding="same")(MP1)
    QConv2  = Lambda(Quantization_layer, arguments = Arguments )(Conv2)
    Relu2   = tf.keras.activations.relu(QConv2)
    QRelu2  = Lambda(Quantization_layer, arguments = Arguments )(Relu2)
    BN2     = BatchNormalization()(QRelu2)
    QBN2    = Lambda(Quantization_layer, arguments = Arguments )(BN2)
    MP2     = MaxPool2D(pool_size=(3,3), strides=(2,2))(QBN2)
    
    Conv3   = Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), padding="same")(MP2)
    QConv3  = Lambda(Quantization_layer, arguments = Arguments )(Conv3)
    Relu3   = tf.keras.activations.relu(QConv3)
    QRelu3  = Lambda(Quantization_layer, arguments = Arguments )(Relu3)
    BN3     = BatchNormalization()(QRelu3)
    QBN3    = Lambda(Quantization_layer, arguments = Arguments )(BN3)
    
    Conv4   = Conv2D(filters=384, kernel_size=(1,1), strides=(1,1), padding="same")(QBN3)
    QConv4  = Lambda(Quantization_layer, arguments = Arguments )(Conv4)
    Relu4   = tf.keras.activations.relu(QConv4)
    QRelu4  = Lambda(Quantization_layer, arguments = Arguments )(Relu4)
    BN4     = BatchNormalization()(QRelu4)
    QBN4    = Lambda(Quantization_layer, arguments = Arguments )(BN4)
    
    Conv5   = Conv2D(filters=256, kernel_size=(1,1), strides=(1,1), padding="same")(QBN4)
    QConv5  = Lambda(Quantization_layer, arguments = Arguments )(Conv5)
    Relu5   = tf.keras.activations.relu(QConv5)
    QRelu5  = Lambda(Quantization_layer, arguments = Arguments )(Relu5)
    BN5     = BatchNormalization()(QRelu5)
    QBN5    = Lambda(Quantization_layer, arguments = Arguments )(BN5)
    MP5     = MaxPool2D(pool_size=(3,3), strides=(2,2))(QBN5)
    
    Flat    = Flatten()(MP5)
    
    Dense6  = Dense(4096)(Flat)
    QDense6 = Lambda(Quantization_layer, arguments = Arguments )(Dense6)
    Relu6   = tf.keras.activations.relu(QDense6)
    QRelu6  = Lambda(Quantization_layer, arguments = Arguments )(Relu6)
    Drop6   = Dropout(0.5)(QRelu6)
    
    Dense7  = Dense(4096)(Drop6)
    QDense7 = Lambda(Quantization_layer, arguments = Arguments )(Dense7)
    Relu7   = tf.keras.activations.relu(QDense7)
    QRelu7  = Lambda(Quantization_layer, arguments = Arguments )(Relu7)
    Drop7   = Dropout(0.5)(QRelu7)
    
    Dense8  = Dense(10)(Drop7)
    QDense8 = Lambda(Quantization_layer, arguments = Arguments )(Dense8)
    SM8     = tf.keras.activations.softmax(QDense8)
    QSM8    = Lambda(Quantization_layer, arguments = Arguments )(SM8)
    
    return QSM8

## Loading Quantized model and Non Quantized model 

En base al resultado del ultimo item del cuaderno "AlexNet Analysis" las entradas/salidas de las capas tienen valores que varian entre -80 y 53.5, aunque los valores maximos y minimos varian enormemente entre capas, por ahora realizaremos una cuantizacion igual para cada capa.

Considerando un bit para el signo, se necesitan al menos 7 bits para la parte entera, se medirá el accuracy de modelos en funcion del numero de bits destinados a la parte fraccionaria.

In [6]:

input_layer   = tf.keras.Input((227,227,3))
output_layer  = build_model(input_layer, Quantization = False)

Frac_Bits = 5
Int_Bits  = 7

Qinput_layer  = tf.keras.Input((227,227,3))
Qoutput_layer = build_model(Qinput_layer, Quantization = True, word_size = (Frac_Bits+Int_Bits+1), frac_size = Frac_Bits)

AlexNet  = tf.keras.Model(inputs=input_layer, outputs=output_layer)
QAlexNet = tf.keras.Model(inputs=Qinput_layer, outputs=Qoutput_layer)

## Loading Pretrained Weights

In [7]:
# Loading Wieghts
cwd = os.getcwd()
Wgt_dir = os.path.join(cwd,'TrainedWeights')
Wgt_dir = os.path.join(Wgt_dir,'Weights')
AlexNet.load_weights(Wgt_dir)
QAlexNet.load_weights(Wgt_dir)

# Visualize AlexNet Architecture
#AlexNet.summary()

# Visualize initialized weights
#AlexNet.weights

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x22b6f736108>

## Quantize weights

En base al resultado del ultimo item del cuaderno "AlexNet Analysis" los valores de los pesos varian entre -0.16 y 28.7, en este caso, en general no hay grandes variaciones entre valores por capa.

Obs: vale la pena notar que dado que el minimo de los pesos es muy cercano a 0, puede la cuantizacion sin signo ser una alternativa, ademas salvo una capa de batchnormalization con pesos de altos valores (28.7 y otros), los demas pesos se mantienen de baja magnitud (menor a 8 y en su gran mayoria menor a 2)

Considerando un bit para el signo, se necesitan al menos 5 bits para la parte entera, se medirá el accuracy de modelos en funcion del numero de bits destinados a la parte fraccionaria.

In [8]:
def Quantization(List, Quantization = True, signed = True, word_size = 12, frac_size = 6):
    factor = 2.0**frac_size
    return tf.round(np.array(List)*factor) / factor             #Quantization, assuming no overflow

Frac_Bits = 5
Int_Bits  = 5


for layer in QAlexNet.layers:
    weights = layer.get_weights()
    if weights:                     # Layer with weights
        # Quantization of Weights and Bias 
        Qweights    = [Quantization(itm, word_size = (Int_Bits+Frac_Bits+1), frac_size = Frac_Bits) for itm in weights]
        layer.set_weights(Qweights)

## Check the general Accuracy

In [9]:
# Optimization params
# -------------------

# Loss
loss = tf.keras.losses.CategoricalCrossentropy()

# learning rate
lr = 1e-3
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
# -------------------

# Validation metrics
# ------------------

metrics = ['accuracy']
# ------------------

# Compile Model
AlexNet.compile(optimizer=optimizer, loss=loss, metrics=metrics)
QAlexNet.compile(optimizer=optimizer, loss=loss, metrics=metrics)

## Original model

In [10]:
AlexNet.evaluate(test_dataset)



[0.6315659527876948, 0.7898]

## 3 bit precision weights && 3 bit precision activations

In [11]:
QAlexNet.evaluate(test_dataset)



[2.3025851249694824, 0.1]

## 5 bit precision weights && 5 bit precision activations

In [10]:
QAlexNet.evaluate(test_dataset)



[6.166052489484474, 0.4053]

## 6 bit precision weights && 6 bit precision activations

In [16]:
QAlexNet.evaluate(test_dataset)



[1.0706737461386249, 0.7398]

## 7 bit precision weights && 7 bit precision activations

In [21]:
QAlexNet.evaluate(test_dataset)



[0.8646249719636515, 0.7533]

## 8 bit precision weights && 8 bit precision activations

In [26]:
QAlexNet.evaluate(test_dataset)



[0.6723387817996089, 0.7935]

Se necesitan al menos 6 bits para tener resultados decentes, luego de 8 bits se alcanza al modelo original