In [110]:
# Author: Naveen Lalwani
# Script to train and Quantize baseline model LeNet-5 on CIFAR-10 dataset and save it

import numpy as np
import tensorflow as tf
from collections import Counter
from tensorflow.contrib import lite
from keras.utils import np_utils
from sklearn.utils import shuffle

In [111]:
# 32 examples in a mini-batch, smaller batch size means more updates in one epoch
batch_size = 64 
num_classes = 10
epochs = 100
learning_rate = 0.00025
display_step = 10

## Downloading CIFAR-10 Dataset and preprocessing 

In [112]:
(x_train, y_train), (x_test, y_test) =  tf.keras.datasets.cifar10.load_data()

# Enabling One Hot Encoding
y_train = np_utils.to_categorical(y_train, num_classes)
y_test = np_utils.to_categorical(y_test, num_classes)

# Changing input image datatype to float
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

# Normalizaig data
x_train  /= 255
x_test /= 255

In [113]:
# Placeholders
X = tf.placeholder(tf.float32, shape = [None, 32, 32, 3], name = "X") # Placeholder for Images 32 X 32 size with 3 RGB channels
Y = tf.placeholder(tf.float32, shape = [None, 10], name = "Y") # Placeholder for Labels

In [114]:
def LeNet(x):    
    '''
    LeNet-5 is a 5 layer neural network that takes in input of size 32x32
        
    The first layer is a convolution layer that has 6 filters with kernel size 5x5
    After convolution, the output will go through ReLU activation function.
    After ReLU, the output activaations are down-sampled by a max pooling layer with kernel size 2x2

    The second layer is a convolution layer that has 16 filters with kernel size 5x5
    After convolution, the output will go through ReLU activation function.
    After ReLU, the output activaations are down-sampled by a max pooling layer with kernel size 2x2

    The third layer is a fully-connected layer with 120 hidden units.
    After fully-connected, the output will go through ReLU activation function.

    The fourth layer is a fully-connected layer with 84 hidden units.
    After fully-connected, the output will go through ReLU activation function.

    The last layer is a fully-connected layer that ouputs 10 units (10 classes for MNIST)
   
   '''
    # Arguments used for tf.truncated_normal, randomly defines variables for the weights and biases for each layer
    mu = 0
    sigma = 0.1    
    
    weights = {
        # The shape of the filter weight is (height, width, input_depth, output_depth)
        'conv1': tf.Variable(tf.truncated_normal(shape=(5, 5, 3, 6), mean = mu, stddev = sigma)),
        'conv2': tf.Variable(tf.truncated_normal(shape=(5, 5, 6, 16), mean = mu, stddev = sigma)),
        'fl1': tf.Variable(tf.truncated_normal(shape=(5 * 5 * 16, 120), mean = mu, stddev = sigma)),
        'fl2': tf.Variable(tf.truncated_normal(shape=(120, 84), mean = mu, stddev = sigma)),
        'out': tf.Variable(tf.truncated_normal(shape=(84, num_classes), mean = mu, stddev = sigma))
    }

    biases = {
        # The shape of the filter bias is (output_depth,)
        'conv1': tf.Variable(tf.zeros(6)),
        'conv2': tf.Variable(tf.zeros(16)),
        'fl1': tf.Variable(tf.zeros(120)),
        'fl2': tf.Variable(tf.zeros(84)),
        'out': tf.Variable(tf.zeros(num_classes))
    }
    
    b_min1 = tf.reduce_min(biases['conv1'])
    b_max1 = tf.reduce_max(biases['conv1'])
    b_fake_quant1 = tf.fake_quant_with_min_max_vars(biases['conv1'], 
                    min=b_min1, 
                    max=b_max1, 
                    narrow_range=True,
                    name="b1")
    
    b_min2 = tf.reduce_min(biases['conv2'])
    b_max2 = tf.reduce_max(biases['conv2'])
    b_fake_quant2 = tf.fake_quant_with_min_max_vars(biases['conv2'], 
                    min=b_min2, 
                    max=b_max2, 
                    narrow_range=True,
                    name="b2")
    
    b_min3 = tf.reduce_min(biases['fl1'])
    b_max3 = tf.reduce_max(biases['fl1'])
    b_fake_quant3 = tf.fake_quant_with_min_max_vars(biases['fl1'], 
                    min=b_min3, 
                    max=b_max3, 
                    narrow_range=True,
                    name="b3")
    
    b_min4 = tf.reduce_min(biases['fl2'])
    b_max4 = tf.reduce_max(biases['fl2'])
    b_fake_quant4 = tf.fake_quant_with_min_max_vars(biases['fl2'], 
                    min=b_min4, 
                    max=b_max4, 
                    narrow_range=True,
                    name="b4")
    
    b_min5 = tf.reduce_min(biases['out'])
    b_max5 = tf.reduce_max(biases['out'])
    b_fake_quant5 = tf.fake_quant_with_min_max_vars(biases['out'], 
                    min=b_min5, 
                    max=b_max5, 
                    narrow_range=True,
                    name="b5")
    
    w_min1 = tf.reduce_min(weights['conv1'])
    w_max1 = tf.reduce_max(weights['conv1'])
    w_fake_quant1 = tf.fake_quant_with_min_max_vars(weights['conv1'], 
                    min=w_min1, 
                    max=w_max1, 
                    narrow_range=True,
                    name="w1")
    
    w_min2 = tf.reduce_min(weights['conv2'])
    w_max2 = tf.reduce_max(weights['conv2'])
    w_fake_quant2 = tf.fake_quant_with_min_max_vars(weights['conv2'], 
                    min=w_min2, 
                    max=w_max2, 
                    narrow_range=True,
                    name="w2")
    
    w_min3 = tf.reduce_min(weights['fl1'])
    w_max3 = tf.reduce_max(weights['fl1'])
    w_fake_quant2 = tf.fake_quant_with_min_max_vars(weights['fl1'], 
                    min=w_min3, 
                    max=w_max3, 
                    narrow_range=True,
                    name="w3")
    
    w_min4 = tf.reduce_min(weights['fl2'])
    w_max4 = tf.reduce_max(weights['fl2'])
    w_fake_quant4 = tf.fake_quant_with_min_max_vars(weights['fl2'], 
                    min=w_min4, 
                    max=w_max4, 
                    narrow_range=True,
                    name="w4")
    
    w_min5 = tf.reduce_min(weights['out'])
    w_max5 = tf.reduce_max(weights['out'])
    w_fake_quant5 = tf.fake_quant_with_min_max_vars(weights['out'], 
                    min=w_min5, 
                    max=w_max5, 
                    narrow_range=True,
                    name="w5")


    # Layer 1: Convolutional. Input = 32x32x1. Output = 28x28x6.
    conv1 = tf.nn.conv2d(x, weights['conv1'], strides=[1, 1, 1, 1], padding='VALID')
    conv1 = tf.nn.bias_add(conv1, biases['conv1'])
    # Activation.
    conv1 = tf.nn.relu(conv1)
    # Pooling. Input = 28x28x6. Output = 14x14x6.
    conv1 = tf.nn.avg_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
    
    # Layer 2: Convolutional. Output = 10x10x16.
    conv2 = tf.nn.conv2d(conv1, weights['conv2'], strides=[1, 1, 1, 1], padding='VALID')
    conv2 = tf.nn.bias_add(conv2, biases['conv2'])
    # Activation.
    conv2 = tf.nn.relu(conv2)
    # Pooling. Input = 10x10x16. Output = 5x5x16.
    conv2 = tf.nn.avg_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')

    # Flatten. Input = 5x5x16. Output = 400.
    fl0 = tf.contrib.layers.flatten(conv2)
    
    # Layer 3: Fully Connected. Input = 400. Output = 120.
    fl1 = tf.add(tf.matmul(fl0, weights['fl1']), biases['fl1'])
    # Activation.
    fl1 = tf.nn.relu(fl1)
    
    # Layer 4: Fully Connected. Input = 120. Output = 84.
    fl2 = tf.add(tf.matmul(fl1, weights['fl2']), biases['fl2'])
    # Activation.
    fl2 = tf.nn.relu(fl2)

    # Layer 5: Fully Connected. Input = 84. Output = 10.
    logits = tf.add(tf.matmul(fl2, weights['out']), biases['out'])
                 
    return logits

In [115]:
logits = LeNet(X)

cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits = logits, labels = Y)
loss_op = tf.reduce_mean(cross_entropy)

# Using ADAM optimization
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)
train_op = optimizer.minimize(loss_op)

# Get correct prediction by getting class with maximum probability and get accuracy
correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1))

# This calculates the classification accuracy by first type-casting the vector of booleans to floats, so that False becomes 0 and True 
# becomes 1, and then calculating the average of these numbers.
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [116]:
init = tf.global_variables_initializer()

In [117]:
# Set to use GPU for training Convolution layers
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
with tf.Session(config=config) as sess:
    sess.run(init)
    num_examples = len(x_train)
    acc_hist = []
    cost_hist = []
    batch_x = x_train
    batch_y = y_train
    print("Training------------------")
    for i in range(1, epochs + 1):
        x_train, y_train = shuffle(x_train, y_train)
        for offset in range(0, num_examples, batch_size):
            end = offset + batch_size
            batch_x, batch_y = x_train[offset:end], y_train[offset:end]
            sess.run(train_op, feed_dict={X: batch_x, Y: batch_y}) 
        if (i % display_step == 0):
            loss, acc = sess.run([train_op, accuracy], feed_dict = {X: batch_x, Y: batch_y})
            loss = loss
            cost_hist.append(loss)
            acc_hist.append(acc)
            print('Epoch ' + str(i) + ', Cost: ' + str(loss) + ', Accuracy on batch: ' + str(acc * 100) + ' %')
            print("Test Accuracy: ", str(accuracy.eval({ X : x_test, Y : y_test}) * 100) + ' %')
    print('-' * 70)
    print('\nOptimization Finished\n')
    print('Now testing accuracy on the complete data, we have:\n')
    acc1 = 0
    for i in range(0, num_examples, 5000):
        end = i + 5000
        acc1 = acc1 + accuracy.eval({ X : x_train[i:end], Y : y_train[i:end]})
    print("Train Accuracy: ", str((acc1 / 10) * 100) + ' %')
    print("Test Accuracy: ", str(accuracy.eval({ X : x_test, Y : y_test}) * 100) + ' %')
    
    # Saving the full precision model
    converter = lite.TFLiteConverter.from_session(sess, [X], [logits])
    tflite_model = converter.convert()
    open("LeNet5_float32_model_CIFAR-10_2.tflite", "wb").write(tflite_model)
    
    # Saving the quantized model
    converter = lite.TFLiteConverter.from_session(sess, [X], [logits])
    converter.inference_input_type = tf.float32
    converter.default_ranges_stats = (0., 6.)
    input_mean = 128
    input_stddev = 255
    input_arrays = converter.get_input_arrays()
    converter.quantized_input_stats = {input_arrays[0] : (input_mean, input_stddev)}
    converter.post_training_quantize = True
    tflite_model = converter.convert()
    open("Lenet5_int8_model_CIFAR-10_2.tflite", "wb").write(tflite_model)

Training------------------
Epoch 10, Cost: None, Accuracy on batch: 62.5 %
Test Accuracy:  52.95000076293945 %
Epoch 20, Cost: None, Accuracy on batch: 62.5 %
Test Accuracy:  56.36000037193298 %
Epoch 30, Cost: None, Accuracy on batch: 50.0 %
Test Accuracy:  59.92000102996826 %
Epoch 40, Cost: None, Accuracy on batch: 56.25 %
Test Accuracy:  61.37999892234802 %
Epoch 50, Cost: None, Accuracy on batch: 62.5 %
Test Accuracy:  61.62999868392944 %
Epoch 60, Cost: None, Accuracy on batch: 68.75 %
Test Accuracy:  61.86000108718872 %
Epoch 70, Cost: None, Accuracy on batch: 68.75 %
Test Accuracy:  62.529999017715454 %
Epoch 80, Cost: None, Accuracy on batch: 87.5 %
Test Accuracy:  62.41999864578247 %
Epoch 90, Cost: None, Accuracy on batch: 87.5 %
Test Accuracy:  63.099998235702515 %
Epoch 100, Cost: None, Accuracy on batch: 87.5 %
Test Accuracy:  61.29999756813049 %
----------------------------------------------------------------------

Optimization Finished

Now testing accuracy on the comp