In [None]:
# Author: Naveen Lalwani
# Script to train Student model on MNIST without distilling any knowledge

import numpy as np
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter
from tensorflow.contrib import lite
import time

Importing the <B>MNIST</B> dataset from the Tensorflow examples. The images get saved in the folder named "MNIST_data" until otherwise specifed.

In [75]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
testData, testLabels = mnist.test.images, mnist.test.labels

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


The total number of training examples in MNIST dataset is 55,000 has a total of 10 labels. Loading the training data.

In [None]:
numTrain = mnist.train.images.shape[0]
trainingData = mnist.train.images[:numTrain,:]
trainingLabels = mnist.train.labels[:numTrain,:]

numTest = mnist.test.images.shape[0]
testData = mnist.test.images[:numTest,:]
testLabels = mnist.test.labels[:numTest,:]

<B>Building of Single hidden layer with 100 units neural network. </B>

##Defining the training parameters.

In [None]:
learning_rate = 0.01
epochs = 300
batch_size = 256
display_step = 10

##Network Parameters.

In [None]:
n_input = 784
n_classes = 10

In [None]:
# Placeholders
X = tf.placeholder(tf.float32, shape = [None, n_input], name = "X") # Placeholder for Images
Y = tf.placeholder(tf.float32, shape = [None, n_classes], name = "Y") # Placeholder for Labels

##Defining variables to be optimized.

##Defining weights & biases for the three layers.

In [None]:
weights = {
    # Fully Connected Layer 1: 784 input channels, 100 output channels
    'w1' : tf.Variable(tf.random_normal([784, 50]), name = 'w1'),
    # Fully Connected Layer 2: 100 input channels, 10 (number of classes) output channels
    'w2' : tf.Variable(tf.random_normal([50, 10]), name = 'w2')
}
biases = {
    'b1' : tf.Variable(tf.random_normal([50]), name = 'b1'),
    'b2' : tf.Variable(tf.random_normal([10]), name = 'b2')
}

##Defining the model with single hidden layer having 50 hidden units

In [None]:
def model_50(x, weight, bias):
    # Fully Connected Layer 1
    fc1 = tf.add(tf.matmul(x, weight['w1']), bias['b1']) # Linear Function
    fc1 = tf.nn.relu(fc1) # Activation Function
    
    # Ouput Layer
    out = tf.add(tf.matmul(fc1, weight['w2']), bias['b2']) # Output Layer
    
    return out

##Training the model.

In [None]:
# Get probabilities for the input for all the classes
logits = model_50(X, weights, biases)

cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits = logits, labels = Y)
loss_op = tf.reduce_mean(cross_entropy)

# Since we have the cost in 'loss_op', variable, we need an optimizer to reduce the cost.
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)

# Minimize the optimization function i.e. minimize the loss.
train_op = optimizer.minimize(loss_op)

# Get correct prediction by getting class with maximum probability and get accuracy
correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1))

# This calculates the classification accuracy by first type-casting the vector of booleans to floats, so that False becomes 0 and True 
# becomes 1, and then calculating the average of these numbers.
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

#Initializing all the variables & Running tensorflow session.

In [None]:
init = tf.global_variables_initializer()

In [84]:
def next_batch(batch_size, data, labels):
    idx = np.arange(0, len(data))
    np.random.shuffle(idx)
    idx = idx[: batch_size]
    data_shuffle = [data[i] for i in idx]
    labels_shuffle = [labels[i] for i in idx]
    return np.asarray(data_shuffle), np.asarray(labels_shuffle)


with tf.Session() as sess:
    # Running Initializer
    sess.run(init)
    
    # For saving cost history and accuracy history on batches
    cost_hist, acc_hist = [], []
    for epoch in range(1, epochs + 1):
        batch_x, batch_y = next_batch(batch_size, trainingData, trainingLabels)
        sess.run(train_op, feed_dict = { X : batch_x, Y : batch_y })
        if epoch % display_step == 0:
            # Calculating Loss and Accuracy on the current Epoch
            loss, acc = sess.run([train_op, accuracy], feed_dict = { X : batch_x, Y : batch_y })
            cost_hist.append(loss)
            acc_hist.append(acc)
            print('Epoch ' + str(epoch) + ', Cost: ' + str(loss) + ', Accuracy on batch: ' + str(acc * 100) + ' %')
            print("Train Accuracy after " + str(epoch) + " on training data: ", str(accuracy.eval({X:trainingData, Y:trainingLabels}) * 100) + ' %')
    
    print('-' * 70)
    print('\nOptimization Finished\n')
    print('Now testing accuracy on the complete data, we have:\n')
    print("Training Accuracy:", accuracy.eval({X:trainingData, Y:trainingLabels}))
    start = time.time()
    print("Test Accuracy:", accuracy.eval({X:testData, Y:testLabels}))
    end = time.time()
    print("Inference Time: ", (end-start))
    
    # Saving the full precision model
    converter = lite.TFLiteConverter.from_session(sess, [X], [logits])
    tflite_model = converter.convert()
    open("Student_model.tflite", "wb").write(tflite_model)

Epoch 10, Cost: None, Accuracy on batch: 27.34375 %
Train Accuracy after 10 on training data:  29.730910062789917 %
Epoch 20, Cost: None, Accuracy on batch: 49.21875 %
Train Accuracy after 20 on training data:  53.812724351882935 %
Epoch 30, Cost: None, Accuracy on batch: 63.28125 %
Train Accuracy after 30 on training data:  64.68363404273987 %
Epoch 40, Cost: None, Accuracy on batch: 68.75 %
Train Accuracy after 40 on training data:  70.30909061431885 %
Epoch 50, Cost: None, Accuracy on batch: 73.4375 %
Train Accuracy after 50 on training data:  73.14363718032837 %
Epoch 60, Cost: None, Accuracy on batch: 75.390625 %
Train Accuracy after 60 on training data:  75.28545260429382 %
Epoch 70, Cost: None, Accuracy on batch: 76.5625 %
Train Accuracy after 70 on training data:  76.91272497177124 %
Epoch 80, Cost: None, Accuracy on batch: 74.609375 %
Train Accuracy after 80 on training data:  78.86727452278137 %
Epoch 90, Cost: None, Accuracy on batch: 77.34375 %
Train Accuracy after 90 on tr

In [None]:
sess.close()