In [1]:
# Imports are doe Here:
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
from six.moves import range

In [2]:
# Now as always we get the data we stored in the disk.
cleaned_dataset_path = '/Users/sam/All-Program/App-DataSet/Deep-Neural-Nets/MNIST_ImageClassification/DataPreparation/dataset_cleaned.p'

with open(cleaned_dataset_path, 'rb') as f:
    fnl_dataset = pickle.load(f)
    training_dataset = (fnl_dataset['training_dataset'])
    training_labels = (fnl_dataset['training_labels'])
    test_dataset = (fnl_dataset['test_dataset'])
    test_labels = (fnl_dataset['test_labels'])
    crossvalid_dataset = (fnl_dataset['crossvalid_dataset'])
    crossvalid_labels = (fnl_dataset['crossvalid_labels'])
    
print('Training:', training_dataset.shape, training_labels.shape)
print('Cross Validation:', crossvalid_dataset.shape, crossvalid_labels.shape)
print('Testing:', test_dataset.shape, test_labels.shape)

Training: (200000, 28, 28) (200000,)
Cross Validation: (9810, 28, 28) (9810,)
Testing: (7709, 28, 28) (7709,)


In [6]:
image_size = 28
no_of_labels = 10
no_input_units = image_size * image_size

# tensor flow takes the labels input as binary code, where Alphabet A whose binary value is 0 will turn to a array
# with elements [1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0] and B becomes [0, 1.0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
def reshape_data(dataset, labels, sample_size=None):
    if sample_size:
        dataset = dataset[:sample_size].reshape(sample_size, no_input_units) # To reshape the  
        # Map 0 to [1.0, 0.0, 0.0 ...], 1 to [0.0, 1.0, 0.0 ...]
        labels = (np.arange(no_of_labels) == labels[:,None]).astype(np.float32)
    else:
        dataset = dataset.reshape(len(dataset), no_input_units) # To reshape the  
        # Map 0 to [1.0, 0.0, 0.0 ...], 1 to [0.0, 1.0, 0.0 ...]
        labels = (np.arange(no_of_labels) == labels[:,None]).astype(np.float32)
    return dataset, labels

# We just reshape the image so that 1 image defines a row

# training_dataset[:].reshapeshape
training_dataset_, training_labels_ = reshape_data(training_dataset, training_labels)
crossvalid_dataset_, crossvalid_labels_ = reshape_data(crossvalid_dataset, crossvalid_labels)
test_dataset_, test_labels_ = reshape_data(test_dataset, test_labels)
print('Training set', training_dataset_.shape, training_labels_.shape)
print('Cross Validation set', crossvalid_dataset_.shape, crossvalid_labels_.shape)
print('Test set', test_dataset_.shape, test_labels_.shape)

Training set (200000, 784) (200000, 10)
Cross Validation set (9810, 784) (9810, 10)
Test set (7709, 784) (7709, 10)


In [7]:
# Build the tensor flow graph
learning_rate = 0.5
momentum = 0.9
graph = tf.Graph()
batch_size = 128

with graph.as_default():
    # We load all the training data, test data and crossvalid data into the contants
    tf_training_dataset = tf.placeholder(tf.float32,shape=(batch_size, no_input_units))
    tf_training_labels = tf.placeholder(tf.float32,shape=(batch_size, no_of_labels))
    # The Training data are put in a placholder because we update the training for every batch
    tf_crossvalid_dataset = tf.constant(crossvalid_dataset_)
    tf_crossvalid_labels = tf.constant(crossvalid_labels_)
    tf_test_dataset = tf.constant(test_dataset_)
    tf_test_labels = tf.constant(test_labels_)
    
    
    # Weight Initialization: In weight Initialization the weights are randomly initialized from a normal distribution
    # One weight for each pixel and for each output label plus one 1 bais term.
    weight_matrix = tf.Variable(tf.truncated_normal([no_input_units, no_of_labels]))
    
    biases = tf.Variable(tf.zeros([no_of_labels]))

    # We have now obtained our random weights and x inputs, now lets train our model 
    # We multiply our weight to X's and add the baises term.
    logits = tf.matmul(tf_training_dataset, weight_matrix) + biases
    
    # The next step after the logit function is to compute the softmax and then the perform the cross-entropy. 
    # In Tensor flow both the steps are achieved with a single function.
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, tf_training_labels))
    
    # Now we build the optimization function using Gradient Descet to find the mnimum point
    #optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
    optimizer = tf.train.MomentumOptimizer(learning_rate, 
                                            momentum, 
                                            use_locking=False, 
                                            name='Momentum', 
                                            use_nesterov=False).minimize(loss)
     
    # We have built the logit function and used the optimize to find the minimum point.
    # Now we make the prediction and compare the accurary, 
    training_prediction = tf.nn.softmax(logits)
    crossvalid_prediction = tf.nn.softmax(tf.matmul(tf_crossvalid_dataset, weight_matrix) + biases)
    test_prediction = tf.nn.softmax(tf.matmul(tf_test_dataset, weight_matrix) + biases)
    
#     print ([w for no, w in enumerate(start_session().run(weight_matrix)) if no<=2])
#     print ([w for w in start_session().run(biases)])
#     print ([w for no, w in enumerate(start_session().run(loss)) if no<=2])
#     print ('')
#     print ([w for n, w in enumerate(start_session().run(train_prediction)) if n<=10])

In [14]:
# Here We start the session for the graph build in the above section. We use mini-batch stochastic method
# for calculating the gradient.
epochs = 3001

def accuracy(predictions, labels):
    return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

with tf.Session(graph=graph) as session:
    tf.initialize_all_variables().run()
    print("All variable Initialized successfully")
    for epoch in range(epochs):
#         print ('The Epoch is: ', epoch)
    # Pick an offset within the training data, which has been randomized.
    # Note: we could use better randomization across epochs.
        offset = (epoch * batch_size) % (training_labels.shape[0] - batch_size)  # 128 as the batch_size
        # Generate a minibatch.
        batch_data = training_dataset_[offset:(offset + batch_size), :]
        batch_labels = training_labels_[offset:(offset + batch_size), :]
#         print ('offset is ; ', offset)
        #Prepare a dictionary telling the session where to feed the minibatch.
        # The key of the dictionary is the placeholder node of the graph to be fed,
        # and the value is the numpy array to feed to it.
        feed_dict = {tf_training_dataset : batch_data, tf_training_labels : batch_labels}
        _, l, predictions = session.run([optimizer, loss, training_prediction], feed_dict=feed_dict)
        
        # Calling .eval() on valid_prediction is basically like calling run(), but
        # just to get that one numpy array. Note that it recomputes all its graph
        # dependencies.
        if (epoch % 500 == 0):
            print("Minibatch loss at epoch %d: %f" % (epoch, l))
            print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
            print("Validation accuracy: %.1f%%" % accuracy(crossvalid_prediction.eval(), crossvalid_labels_))
    print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels_))

All variable Initialized successfully
Minibatch loss at epoch 0: 16.359072
Minibatch accuracy: 9.4%
Validation accuracy: 12.3%
Minibatch loss at epoch 500: 1.298825
Minibatch accuracy: 75.0%
Validation accuracy: 55.3%
Minibatch loss at epoch 1000: 2.221663
Minibatch accuracy: 70.3%
Validation accuracy: 56.9%
Minibatch loss at epoch 1500: 1.521302
Minibatch accuracy: 80.5%
Validation accuracy: 54.3%
Minibatch loss at epoch 2000: 0.984752
Minibatch accuracy: 83.6%
Validation accuracy: 56.9%
Minibatch loss at epoch 2500: 1.171123
Minibatch accuracy: 79.7%
Validation accuracy: 56.8%
Minibatch loss at epoch 3000: 1.612632
Minibatch accuracy: 70.3%
Validation accuracy: 58.7%
Test accuracy: 82.0%


In [None]:
# Notes Because of the large learning_rate (0.5), the accuracy depletes after sometime. This could mean that the 
# descent jumps the minimum position and may Oscillate