In [1]:
# Author: Naveen Lalwani
# Script to train and Quantize baseline model LeNet-5 on MNIST dataset and save it

import numpy as np
import tensorflow as tf
from sklearn.utils import shuffle
from tensorflow.contrib import lite
from collections import Counter

config = tf.ConfigProto()
config.gpu_options.allow_growth = True

In [2]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", reshape=False, one_hot=True)
trainingData, trainingLabels = mnist.train.images, mnist.train.labels
testData, testLabels = mnist.test.images, mnist.test.labels

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


###Since, LeNet architecture accepts 32x32x1 as an input but we have the input shape in the format of 28x28x1. Thus, we use numpy's padding to pad the training and test data with zeros to change its shape.

In [3]:
# Padding images with 0s
trainingData = np.pad(trainingData, ((0,0),(2,2),(2,2),(0,0)), 'constant')
testData = np.pad(testData, ((0,0),(2,2),(2,2),(0,0)), 'constant')
X_train, X_labels = trainingData, trainingLabels     
print("Updated Image Shape: {}".format(trainingData[0].shape))

Updated Image Shape: (32, 32, 1)


In [4]:
# Network Parameters
learning_rate = 0.001
epochs = 10
batch_size = 128
display_step = 1
n_classes = 10

In [5]:
# Placeholders
X = tf.placeholder(tf.float32, shape = [None, 32, 32, 1], name = "X") # Placeholder for Images
Y = tf.placeholder(tf.float32, shape = [None, n_classes], name = "Y") # Placeholder for Labels

In [6]:
def LeNet_5(x):    
    '''
    LeNet-5 is a 5 layer neural network that takes in input of size 32x32
        
    The first layer is a convolution layer that has 6 filters with kernel size 5x5
    After convolution, the output will go through ReLU activation function.
    After ReLU, the output activaations are down-sampled by a max pooling layer with kernel size 2x2

    The second layer is a convolution layer that has 16 filters with kernel size 5x5
    After convolution, the output will go through ReLU activation function.
    After ReLU, the output activaations are down-sampled by a max pooling layer with kernel size 2x2

    The third layer is a fully-connected layer with 120 hidden units.
    After fully-connected, the output will go through ReLU activation function.

    The fourth layer is a fully-connected layer with 84 hidden units.
    After fully-connected, the output will go through ReLU activation function.

    The last layer is a fully-connected layer that ouputs 10 units (10 classes for MNIST)
   
   '''
    # Arguments used for tf.truncated_normal, randomly defines variables for the weights and biases for each layer
    mu = 0
    sigma = 0.1    
    
    weights = {
        # The shape of the filter weight is (height, width, input_depth, output_depth)
        'conv1': tf.Variable(tf.truncated_normal(shape=(5, 5, 1, 6), mean = mu, stddev = sigma)),
        'conv2': tf.Variable(tf.truncated_normal(shape=(5, 5, 6, 16), mean = mu, stddev = sigma)),
        'fl1': tf.Variable(tf.truncated_normal(shape=(5 * 5 * 16, 120), mean = mu, stddev = sigma)),
        'fl2': tf.Variable(tf.truncated_normal(shape=(120, 84), mean = mu, stddev = sigma)),
        'out': tf.Variable(tf.truncated_normal(shape=(84, n_classes), mean = mu, stddev = sigma))
    }

    biases = {
        # The shape of the filter bias is (output_depth,)
        'conv1': tf.Variable(tf.zeros(6)),
        'conv2': tf.Variable(tf.zeros(16)),
        'fl1': tf.Variable(tf.zeros(120)),
        'fl2': tf.Variable(tf.zeros(84)),
        'out': tf.Variable(tf.zeros(n_classes))
    }
    
    b_min1 = tf.reduce_min(biases['conv1'])
    b_max1 = tf.reduce_max(biases['conv1'])
    b_fake_quant1 = tf.fake_quant_with_min_max_vars(biases['conv1'], 
                    min=b_min1, 
                    max=b_max1, 
                    narrow_range=True,
                    name="b1")
    
    b_min2 = tf.reduce_min(biases['conv2'])
    b_max2 = tf.reduce_max(biases['conv2'])
    b_fake_quant2 = tf.fake_quant_with_min_max_vars(biases['conv2'], 
                    min=b_min2, 
                    max=b_max2, 
                    narrow_range=True,
                    name="b2")
    
    b_min3 = tf.reduce_min(biases['fl1'])
    b_max3 = tf.reduce_max(biases['fl1'])
    b_fake_quant3 = tf.fake_quant_with_min_max_vars(biases['fl1'], 
                    min=b_min3, 
                    max=b_max3, 
                    narrow_range=True,
                    name="b3")
    
    b_min4 = tf.reduce_min(biases['fl2'])
    b_max4 = tf.reduce_max(biases['fl2'])
    b_fake_quant4 = tf.fake_quant_with_min_max_vars(biases['fl2'], 
                    min=b_min4, 
                    max=b_max4, 
                    narrow_range=True,
                    name="b4")
    
    b_min5 = tf.reduce_min(biases['out'])
    b_max5 = tf.reduce_max(biases['out'])
    b_fake_quant5 = tf.fake_quant_with_min_max_vars(biases['out'], 
                    min=b_min5, 
                    max=b_max5, 
                    narrow_range=True,
                    name="b5")
    
    w_min1 = tf.reduce_min(weights['conv1'])
    w_max1 = tf.reduce_max(weights['conv1'])
    w_fake_quant1 = tf.fake_quant_with_min_max_vars(weights['conv1'], 
                    min=w_min1, 
                    max=w_max1, 
                    narrow_range=True,
                    name="w1")
    
    w_min2 = tf.reduce_min(weights['conv2'])
    w_max2 = tf.reduce_max(weights['conv2'])
    w_fake_quant2 = tf.fake_quant_with_min_max_vars(weights['conv2'], 
                    min=w_min2, 
                    max=w_max2, 
                    narrow_range=True,
                    name="w2")
    
    w_min3 = tf.reduce_min(weights['fl1'])
    w_max3 = tf.reduce_max(weights['fl1'])
    w_fake_quant2 = tf.fake_quant_with_min_max_vars(weights['fl1'], 
                    min=w_min3, 
                    max=w_max3, 
                    narrow_range=True,
                    name="w3")
    
    w_min4 = tf.reduce_min(weights['fl2'])
    w_max4 = tf.reduce_max(weights['fl2'])
    w_fake_quant4 = tf.fake_quant_with_min_max_vars(weights['fl2'], 
                    min=w_min4, 
                    max=w_max4, 
                    narrow_range=True,
                    name="w4")
    
    w_min5 = tf.reduce_min(weights['out'])
    w_max5 = tf.reduce_max(weights['out'])
    w_fake_quant5 = tf.fake_quant_with_min_max_vars(weights['out'], 
                    min=w_min5, 
                    max=w_max5, 
                    narrow_range=True,
                    name="w5")
    
    # Layer 1: Convolutional. Input = 32x32x1. Output = 28x28x6.
    conv1 = tf.nn.conv2d(x, weights['conv1'], strides=[1, 1, 1, 1], padding='VALID')
    conv1 = tf.nn.bias_add(conv1, biases['conv1'])
    # Activation.
    conv1 = tf.nn.relu(conv1)
    # Pooling. Input = 28x28x6. Output = 14x14x6.
    conv1 = tf.nn.avg_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
    
    # Layer 2: Convolutional. Output = 10x10x16.
    conv2 = tf.nn.conv2d(conv1, weights['conv2'], strides=[1, 1, 1, 1], padding='VALID')
    conv2 = tf.nn.bias_add(conv2, biases['conv2'])
    # Activation.
    conv2 = tf.nn.relu(conv2)
    # Pooling. Input = 10x10x16. Output = 5x5x16.
    conv2 = tf.nn.avg_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')

    # Flatten. Input = 5x5x16. Output = 400.
    fl0 = tf.contrib.layers.flatten(conv2)
    
    # Layer 3: Fully Connected. Input = 400. Output = 120.
    fl1 = tf.add(tf.matmul(fl0, weights['fl1']), biases['fl1'])
    # Activation.
    fl1 = tf.nn.relu(fl1)
    
    # Layer 4: Fully Connected. Input = 120. Output = 84.
    fl2 = tf.add(tf.matmul(fl1, weights['fl2']), biases['fl2'])
    # Activation.
    fl2 = tf.nn.relu(fl2)

    # Layer 5: Fully Connected. Input = 84. Output = 10.
    logits = tf.add(tf.matmul(fl2, weights['out']), biases['out'])
                 
    return logits

# Training the model 

In [7]:
logits = LeNet_5(X)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits = logits, labels = Y)
loss_op = tf.reduce_mean(cross_entropy)
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)
train_op = optimizer.minimize(loss_op)

# Get correct prediction by getting class with maximum probability and get accuracy
correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1))

# This calculates the classification accuracy by first type-casting the vector of booleans to floats, so that False becomes 0 and True 
# becomes 1, and then calculating the average of these numbers.
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.flatten instead.


In [8]:
init = tf.global_variables_initializer()

In [9]:
with tf.Session() as sess:
    sess.run(init)
    num_examples = len(trainingData)
    acc_hist = []
    cost_hist = []
    batch_x = trainingData
    batch_y = trainingLabels
    for i in range(1, epochs + 1):
        for offset in range(0, num_examples, batch_size):
            end = offset + batch_size
            batch_x, batch_y = trainingData[offset:end], trainingLabels[offset:end]
            sess.run(train_op, feed_dict={X: batch_x, Y: batch_y}) 
        loss, acc = sess.run([train_op, accuracy], feed_dict = {X: batch_x, Y: batch_y})
        loss = loss
        cost_hist.append(loss)
        acc_hist.append(acc)
        print('Epoch ' + str(i) + ', Cost: ' + str(loss) + ', Accuracy on batch: ' + str(acc * 100) + ' %')
        total_acc1 = accuracy.eval({ X : X_train[0:27499], Y : X_labels[0:27499]}) / 2
        total_acc2 = accuracy.eval({ X : X_train[27500:55000], Y : X_labels[27500:55000]}) / 2              
        print("Train Accuracy after " + str(i) + " on training data: ", str((total_acc1 + total_acc2) * 100) + ' %')
    print('-' * 70)
    print('\nOptimization Finished\n')
    print('Now testing accuracy on the complete data, we have:\n')
    print("Test Accuracy: ", str(accuracy.eval({ X : testData, Y : testLabels}) * 100) + ' %')
    
    # Saving the full precision model
    converter = lite.TFLiteConverter.from_session(sess, [X], [logits])
    tflite_model = converter.convert()
    open("LeNet5_float32_model_rerun.tflite", "wb").write(tflite_model)
    
    # Saving the quantized model
    converter = lite.TFLiteConverter.from_session(sess, [X], [logits])
    converter.inference_input_type = tf.float32
    converter.default_ranges_stats = (0., 6.)
    input_mean = 128
    input_stddev = 255
    input_arrays = converter.get_input_arrays()
    converter.quantized_input_stats = {input_arrays[0] : (input_mean, input_stddev)}
    converter.post_training_quantize = True
    tflite_model = converter.convert()
    open("LeNet5_int8_model_rerun.tflite", "wb").write(tflite_model)

Epoch 1, Cost: None, Accuracy on batch: 97.72727489471436 %
Train Accuracy after 1 on training data:  93.51260364055634 %
Epoch 2, Cost: None, Accuracy on batch: 97.72727489471436 %
Train Accuracy after 2 on training data:  96.35447561740875 %
Epoch 3, Cost: None, Accuracy on batch: 98.86363744735718 %
Train Accuracy after 3 on training data:  97.32358455657959 %
Epoch 4, Cost: None, Accuracy on batch: 98.86363744735718 %
Train Accuracy after 4 on training data:  97.87996113300323 %
Epoch 5, Cost: None, Accuracy on batch: 98.86363744735718 %
Train Accuracy after 5 on training data:  98.29815030097961 %
Epoch 6, Cost: None, Accuracy on batch: 98.86363744735718 %
Train Accuracy after 6 on training data:  98.58724474906921 %
Epoch 7, Cost: None, Accuracy on batch: 98.86363744735718 %
Train Accuracy after 7 on training data:  98.79088699817657 %
Epoch 8, Cost: None, Accuracy on batch: 98.86363744735718 %
Train Accuracy after 8 on training data:  98.881796002388 %
Epoch 9, Cost: None, Accur

In [10]:
sess.close()