In [5]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import datetime

%matplotlib inline
# ECE521 Assignment 2
# Shafquat Arefeen
# Ahmed Faraz Khan 998822779

In [10]:
# Random seed - current time
seed_time = int((datetime.datetime.now()-datetime.datetime(1970,1,1)).total_seconds())
np.random.seed(seed_time)
print "Random Seed", seed_time

with np.load("notMNIST.npz") as data:
    images, labels = data["images"], data["labels"]
images.shape

# Parameters

# Constant parameters
n_input = 784 # Input data dimension
n_classes = 10 # Output classes
training_set_size = 15000 
dropout_rate = 0.5
display_step = 1
n_units = [n_input] # include at least the input 

# Less constant parameters
batch_size = 1000
num_epochs = 100
momentum = 0.99

random_hyperparameters = False

# NN Parameters
learning_rate = 0.1
n_layers = 1
n_hidden1 = 1000 # Number of hidden units in layer 1
n_hidden2 = 500 # Number of hidden units in layer 2


# Manual hyperparameters
if random_hyperparameters == False:
    n_units += [n_hidden1]
    if n_layers == 2:
        n_units += [n_hidden2]
    n_units += [n_classes] # include output layer

# Random hyperparameter sampling - note: random_integers(low,high) gives low <= x <= high
elif random_hyperparameters == True:
    #n_units = [n_input]
    n_layers = np.random.randon_integers(1,3) # Random number of layers, between 1 and 3
    for i in range(0, n_layers):
        n_units += [np.random.random_integers(100,500)] # Random number of hidden units per layer
    n_units += [n_classes]
    isDropout = bool(np.random.random_integers(0,1))
    log_learning_rate = np.random.random_integers(-4,-2)
    learning_rate = np.exp(log_learning_rate)


def rearrangeImages(image_array):
    rearranged = np.zeros((image_array.shape[2], image_array.shape[0] * image_array.shape[1]), dtype=np.float32)
    for num in range(0, image_array.shape[2]):
        flat_image = image_array[:,:,num].flatten()
        max_elem = np.amax(flat_image)
        for i in range(0, len(flat_image)):
            flat_image[i] = flat_image[i] / max_elem
        rearranged[num,:] = flat_image
    return rearranged
                
    
def oneHot(labels, vector_size):
    oneHot = np.zeros((len(labels), vector_size))
    for i in range(0, len(labels)):
        label_index = labels[i]
        oneHot[i, label_index] = 1
    return oneHot

def preprocess_data(images, labels):
    # Create one-hot label vectors and flatten images
    one_hot_labels = oneHot(labels, 10)
    flat_images = rearrangeImages(images)
    return flat_images, one_hot_labels

def neural_network(_X, _weights, _biases, n_layers): # 1 <= n_layers
    # Hidden units using a ReLU activation function
    hidden_layers = {} 
    hidden_layers_drop = {}
    
    # Always include at least one layer
    hidden_layers[1] = tf.nn.relu(tf.add(tf.matmul(_X, _weights[1]), _biases[1]))
    if (n_layers > 1):
        for i in range(2, n_layers + 1):
            hidden_layers[i] = tf.nn.relu(tf.add(tf.matmul(hidden_layers[i-1], _weights[i]), _biases[i]))
            keep_prob = tf.placeholder(tf.float32)
            hidden_layers_drop[i] = tf.nn.dropout(hidden_layers[i], keep_prob)
    return tf.matmul(hidden_layers_drop[n_layers], _weights[n_layers + 1]) + _biases[n_layers + 1]
    #if (n_layers == 1):
        #return tf.matmul(hidden_layer1, _weights['out']) + _biases['out']
    #elif (n_layers == 2):
        #hidden_layer2 = tf.nn.relu(tf.add(tf.matmul(hidden_layer1, _weights['h2']), _biases['b2']))
        
    

1455575979


In [11]:
# Separate training, validation and test sets
flat_images, one_hot_labels = preprocess_data(images, labels)
training_labels = one_hot_labels[0:15000]
validation_labels = one_hot_labels[15000:16000]
testing_labels = one_hot_labels[16000:]

training_images = flat_images[:15000,:]
validation_images = flat_images[15000:16000,:]
testing_images = flat_images[16000:,:]


# Placeholder Inputs
X = tf.placeholder("float32", shape=(None, 28*28)) # 28x28 flattened arrays
Y = tf.placeholder("float32", shape=(None, 10)) # 10-class classifier

# Variables
W = tf.Variable(np.random.randn(28*28, 10).astype("float32"), name="weight")
b = tf.Variable(np.random.randn(10).astype("float32"), name="bias")

logits = tf.add(tf.matmul(X, W), b)
output = tf.nn.softmax(logits)


cost_batch = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, targets=Y)
cost = tf.reduce_mean(cost_batch)

# Calculate cost (cross-entropy)
#Y_ = tf.placeholder(tf.float32, shape=(None, 10))
#cross_entropy = -tf.reduce_sum(Y_*tf.log(Y))

# Gradient descent
train_op = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=momentum).minimize(cost)

# Prediction for multiple outputs
#pred = tf.greater(output, 0.5)
#pred_float = tf.cast(pred, "float")

# Accuracy
correct_prediction = tf.equal(tf.argmax(Y,1), tf.argmax(output,1))#tf.argmax(Y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
norm_w = tf.nn.l2_loss(W)

sess = tf.InteractiveSession()

init = tf.initialize_all_variables()
sess.run(init)

for epoch in range(num_epochs):
  for i in xrange(int(FLAGS.training_set_size/batch_size)):
    x_batch = training_images[i * batch_size: (i + 1) * batch_size]
    y_batch = training_labels[i * batch_size: (i + 1) * batch_size]
    cost_np, _ = sess.run([cost, train_op],
                          feed_dict={X: x_batch, Y: y_batch})
    
    #Display logs per epoch step
  if (epoch % display_step) == 0:
    cost_train, accuracy_train = sess.run([cost, accuracy],feed_dict={X: training_images, Y: training_labels})
    cost_eval, accuracy_eval, norm_w_np = sess.run([cost, accuracy, norm_w],
                                                   feed_dict={X: validation_images, Y: validation_labels})    
    print ("Epoch:%04d, cost=%0.9f, Train Accuracy=%0.4f, Eval Accuracy=%0.4f,    Norm of Weights=%0.4f" %
           (epoch+1, cost_train, accuracy_train, accuracy_eval, norm_w_np))

In [13]:
# Task 2: Neural Network Training

# tf Graph input
#x = tf.placeholder("float", [None, n_input])
#y = tf.placeholder("float", [None, n_classes])

tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, n_input))
tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, n_classes))
tf_valid_dataset = tf.constant(validation_images, dtype=tf.float32, shape = validation_images.shape)
tf_test_dataset = tf.constant(testing_images, dtype=tf.float32, shape = testing_images.shape)

pred = tf.placeholder(tf.float32, shape=[None, n_classes])
label = tf.placeholder(tf.float32, shape=[None, n_classes])

# Store layers' weights & biases
weights = {}
biases = {}
for l in range(1, n_layers + 2):
    # Normalize by the number of input units
    weights[l] = tf.Variable(tf.random_normal([n_units[l-1], n_units[l]], stddev = np.sqrt(1.0/n_units[l-1]) ))
    biases[l] = tf.Variable(tf.random_normal([n_units[l]], stddev = np.sqrt(1.0/n_units[l-1]) ))

# Construct model
logits = neural_network(tf_train_dataset, weights, biases, n_layers)

# Define cost and optimizer
cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, targets=tf_train_labels)) # Match pred with y
train_op = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)# momentum=momentum)
#train_op = optimizer.minimize(cost)

# Make prediction
train_prediction = tf.nn.softmax(logits)
valid_prediction = tf.nn.softmax(neural_network(tf_valid_dataset, weights, biases, n_layers))
test_prediction = tf.nn.softmax(neural_network(tf_test_dataset, weights, biases, n_layers))

# Compare prediction with labels to evaluate accuracy
correct_preds = tf.equal(tf.argmax(pred, 1), tf.argmax(label, 1))
accuracy_rate = tf.reduce_mean(tf.cast(correct_preds, "float"))
    
# Initializing the variables
init = tf.initialize_all_variables()

# Launch the graph
with tf.Session() as sess:
    sess.run(init)

    # Training cycle
    for epoch in range(num_epochs):
        avg_cost = 0.
        num_batches = int(training_set_size/batch_size)
        # Loop over all batches
        for i in range(num_batches):
            batch_xs = training_images[i * batch_size: (i + 1) * batch_size]
            batch_ys = training_labels[i * batch_size: (i + 1) * batch_size]
            feed_dict = {tf_train_dataset : batch_xs, tf_train_labels : batch_ys, keep_prob = (1.0-dropout_rate)}
            _, l, train_preds = sess.run([train_op, cost, train_prediction], feed_dict=feed_dict)
        
        # Display per epoch 
        if (epoch % display_step) == 0:
            train_accuracy, train_hits = sess.run([accuracy_rate, correct_preds], feed_dict={pred: train_preds, label: batch_ys, keep_prob = 1.0})
            valid_accuracy, valid_hits  = sess.run([accuracy_rate, correct_preds], feed_dict={pred: valid_prediction.eval(), label: validation_labels, keep_prob: 1.0})
            print "Epoch:", '%04d' % (epoch+1)
            print("Minibatch accuracy: %.5f" % train_accuracy)
            print("Validation Accuracy: %.5f" % valid_accuracy)
    test_accuracy, test_hits  = sess.run([accuracy_rate, correct_preds], feed_dict={pred: test_prediction.eval(), label: testing_labels, keep_prob: 1.0})
    print "Training Completed"
    print("Testing Accuracy: %.5f" % test_accuracy)

Epoch: 0001
Minibatch accuracy: 0.29800
Validation Accuracy: 0.29900
Epoch: 0002
Minibatch accuracy: 0.35900
Validation Accuracy: 0.38800
Epoch: 0003
Minibatch accuracy: 0.44400
Validation Accuracy: 0.46900
Epoch: 0004
Minibatch accuracy: 0.53600
Validation Accuracy: 0.54000
Epoch: 0005
Minibatch accuracy: 0.60600
Validation Accuracy: 0.59600
Epoch: 0006
Minibatch accuracy: 0.65500
Validation Accuracy: 0.65300
Epoch: 0007
Minibatch accuracy: 0.69900
Validation Accuracy: 0.67900
Epoch: 0008
Minibatch accuracy: 0.73100
Validation Accuracy: 0.69200
Epoch: 0009
Minibatch accuracy: 0.75000
Validation Accuracy: 0.71200
Epoch: 0010
Minibatch accuracy: 0.76500
Validation Accuracy: 0.72700
Epoch: 0011
Minibatch accuracy: 0.77800
Validation Accuracy: 0.73400
Epoch: 0012
Minibatch accuracy: 0.78700
Validation Accuracy: 0.74500
Epoch: 0013
Minibatch accuracy: 0.79100
Validation Accuracy: 0.75700
Epoch: 0014
Minibatch accuracy: 0.79800
Validation Accuracy: 0.76200
Epoch: 0015
Minibatch accuracy: 0.

KeyboardInterrupt: 

f = open('NN - learning rate = %f.txt' % learning_rate, 'a')
f.write(training_accuracies)
f.write('\n')
f.write(validation_accuracies)
f.close()

In [None]:
batch_ys[0]

In [None]:
# Task 1:
# Task 2: 5 learning rates
# Task 3: 100, 500 and 1000 hidden units
# Task 4: 2 layer network (500 hidden units each)
# Task 5: Dropout
# Task 6: Hyperparameter Optimization