In [None]:
# Author: Naveen Lalwani
# Script to train student model on CIFAR-10 dataset without knowledge distillation

import numpy as np
import tensorflow as tf
# To grow the memory usage as is needed by the process
# Get rid of the error: cuDNN failed to initialize

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter
from tensorflow.contrib import lite
from keras.utils import np_utils
import time

In [None]:
batch_size = 32
num_classes = 10
epochs = 4000
learning_rate = 0.001
display_step = 50
n_input = 3072

Importing the <B>CIFAR 10</B> dataset from the Keras.

In [None]:
(x_train, y_train), (x_test, y_test) =  tf.keras.datasets.cifar10.load_data()

# Enabling One Hot Encoding
y_train = np_utils.to_categorical(y_train, num_classes)
y_test = np_utils.to_categorical(y_test, num_classes)

# Changing input image datatype to float
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

# Normalizaig data
x_train  /= 255
x_test /= 255

x_train = x_train.reshape([50000, 3072])
x_test = x_test.reshape([10000, 3072])

<B>Building of Single hidden layer with 50 units neural network. </B>

In [None]:
# Placeholders
X = tf.placeholder(tf.float32, shape = [None, n_input], name = "X") # Placeholder for Images
Y = tf.placeholder(tf.float32, shape = [None, num_classes], name = "Y") # Placeholder for Labels

#Defining variables to be optimized

##Defining weights & biases for the three layers.

In [None]:
weights = {
    # Fully Connected Layer 1: 3072 input channels, 100 output channels
    'w1' : tf.Variable(tf.random_normal([3072, 50]), name = 'w1'),
    # Fully Connected Layer 2: 100 input channels, 10 (number of classes) output channels
    'w2' : tf.Variable(tf.random_normal([50, 10]), name = 'w2')
}
biases = {
    'b1' : tf.Variable(tf.random_normal([50]), name = 'b1'),
    'b2' : tf.Variable(tf.random_normal([10]), name = 'b2')
}

In [None]:
def model_50(x, weight, bias):
    # Fully Connected Layer 1
    fc1 = tf.add(tf.matmul(x, weight['w1']), bias['b1']) # Linear Function
    fc1 = tf.nn.relu(fc1) # Activation Function
    
    # Ouput Layer
    out = tf.add(tf.matmul(fc1, weight['w2']), bias['b2']) # Output Layer
    
    return out

<B> Training the model </B>

In [None]:
# Get probabilities for the input for all the classes
logits = model_50(X, weights, biases)

cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits = logits, labels = Y)
loss_op = tf.reduce_mean(cross_entropy)

# Since we have the cost in 'loss_op', variable, we need an optimizer to reduce the cost.
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)

# Minimize the optimization function i.e. minimize the loss.
train_op = optimizer.minimize(loss_op)

# Get correct prediction by getting class with maximum probability and get accuracy
correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1))

# This calculates the classification accuracy by first type-casting the vector of booleans to floats, so that False becomes 0 and True 
# becomes 1, and then calculating the average of these numbers.
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

#Initializing all the variables & Running tensorflow session

In [None]:
init = tf.global_variables_initializer()

In [137]:
def next_batch(batch_size, data, labels):
    idx = np.arange(0, len(data))
    np.random.shuffle(idx)
    idx = idx[: batch_size]
    data_shuffle = [data[i] for i in idx]
    labels_shuffle = [labels[i] for i in idx]
    return np.asarray(data_shuffle), np.asarray(labels_shuffle)


with tf.Session() as sess:
    # Running Initializer
    sess.run(init)
    
    # For saving cost history and accuracy history on batches
    cost_hist, acc_hist = [], []
    for epoch in range(1, epochs + 1):
        batch_x, batch_y = next_batch(batch_size, x_train, y_train)
        sess.run(train_op, feed_dict = { X : batch_x, Y : batch_y })
        if epoch % display_step == 0:
            # Calculating Loss and Accuracy on the current Epoch
            loss, acc = sess.run([train_op, accuracy], feed_dict = { X : batch_x, Y : batch_y })
            cost_hist.append(loss)
            acc_hist.append(acc)
            print('Epoch ' + str(epoch) + ', Cost: ' + str(loss) + ', Accuracy on batch: ' + str(acc * 100) + ' %')
            print("Train Accuracy after " + str(epoch) + " on training data: ", str(accuracy.eval({X: x_train, Y: y_train}) * 100) + ' %')
    
    print('-' * 70)
    print('\nOptimization Finished\n')
    print('Now testing accuracy on the complete data, we have:\n')
    print("Training Accuracy:", accuracy.eval({X:x_train, Y: y_train}))
    print("Test Accuracy:", accuracy.eval({X:x_test, Y: y_test}))
    
    # Saving the full precision model
    converter = lite.TFLiteConverter.from_session(sess, [X], [logits])
    tflite_model = converter.convert()
    open("Student_model_CIFAR10.tflite", "wb").write(tflite_model)

In [None]:
sess.close()