In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import time
#import file_io

%matplotlib inline

In [2]:
# Random seed - current time
seed_time = int((time.time()))
np.random.seed(seed_time)
print "Random Seed", seed_time

with np.load("notMNIST.npz") as data:
    images, labels = data["images"], data["labels"]
images.shape

# Parameters

# Constant parameters
n_input = 784 # Input data dimension
n_classes = 10 # Output classes
training_set_size = 15000 
display_step = 1
n_units = [n_input] # include at least the input 

# Less constant parameters
batch_size = 1000
num_epochs = 200
momentum = 0.99

random_hyperparameters = False

# NN Parameters
isDropout = False
    
learning_rate = 0.1
n_layers = 1
n_hidden1 = 100 # Number of hidden units in layer 1
n_hidden2 = 500 # Number of hidden units in layer 2

# Manual hyperparameters
if random_hyperparameters == False:
    n_units += [n_hidden1]
    if n_layers == 2:
        n_units += [n_hidden2]
    n_units += [n_classes] # include output layer

# Random hyperparameter sampling - note: random_integers(low,high) gives low <= x <= high
elif random_hyperparameters == True:
    #n_units = [n_input]
    n_layers = np.random.randon_integers(1,3) # Random number of layers, between 1 and 3
    for i in range(0, n_layers):
        n_units += [np.random.random_integers(100,500)] # Random number of hidden units per layer
    n_units += [n_classes]
    isDropout = bool(np.random.random_integers(0,1))
    log_learning_rate = np.random.random(-4,-2)
    learning_rate = np.exp(log_learning_rate)
    
# After deciding whether to use dropout
if isDropout == True:
    keep_prob = 0.5
else: 
    keep_prob = 1.0

def rearrangeImages(image_array):
    rearranged = np.zeros((image_array.shape[2], image_array.shape[0] * image_array.shape[1]), dtype=np.float32)
    for num in range(0, image_array.shape[2]):
        flat_image = image_array[:,:,num].flatten()
        max_elem = np.amax(flat_image)
        for i in range(0, len(flat_image)):
            flat_image[i] = flat_image[i] / max_elem
        rearranged[num,:] = flat_image
    return rearranged
                
    
def oneHot(labels, vector_size):
    oneHot = np.zeros((len(labels), vector_size))
    for i in range(0, len(labels)):
        label_index = labels[i]
        oneHot[i, label_index] = 1
    return oneHot

def preprocess_data(images, labels):
    # Create one-hot label vectors and flatten images
    one_hot_labels = oneHot(labels, 10)
    flat_images = rearrangeImages(images)
    return flat_images, one_hot_labels

def neural_network(_X, _weights, _biases): # 1 <= n_layers
    # Hidden units using a ReLU activation function
    hidden_layers = {} 
    hidden_layers_drop = {}
    n_layers = len(_weights) - 1
    #keep_prob = tf.placeholder(tf.float32)
    # Always include at least one layer
    hidden_layers[1] = tf.nn.relu(tf.add(tf.matmul(_X, _weights[1]), _biases[1]))
    hidden_layers_drop[1] = tf.nn.relu(tf.add(tf.matmul(_X, _weights[1]), _biases[1]))
    if (n_layers > 1):
        for i in range(2, n_layers + 1):
            hidden_layers[i] = tf.nn.relu(tf.add(tf.matmul(hidden_layers[i-1], _weights[i]), _biases[i]))
            hidden_layers_drop[i] = tf.nn.dropout(hidden_layers[i], keep_prob)
    return tf.matmul(hidden_layers_drop[n_layers], _weights[len(_weights)]) + _biases[len(_biases)]


Random Seed 1455683100


In [3]:
# Separate training, validation and test sets
flat_images, one_hot_labels = preprocess_data(images, labels)
training_labels = one_hot_labels[0:15000]
validation_labels = one_hot_labels[15000:16000]
testing_labels = one_hot_labels[16000:]

training_images = flat_images[:15000,:]
validation_images = flat_images[15000:16000,:]
testing_images = flat_images[16000:,:]


In [5]:
# Task 2: Neural Network Training

# Store layers' weights & biases
weights = {}
biases = {}
for l in range(1, n_layers + 2):
    # Normalize by the number of input units
    weights[l] = tf.Variable(tf.random_normal([n_units[l-1], n_units[l]], stddev = np.sqrt(1.0/n_units[l-1]) ))
    biases[l] = tf.Variable(tf.random_normal([n_units[l]], stddev = np.sqrt(1.0/n_units[l-1]) ))

# TF Graph Input
input_images = tf.placeholder(tf.float32, shape=(None, n_input))
label = tf.placeholder(tf.float32, shape=[None, n_classes])
pred = tf.placeholder(tf.float32, shape=[None, n_classes])

# Construct model
logits = neural_network(input_images, weights, biases)

# Define cost
cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, targets=label)) # Match pred with y

# Make prediction
prediction = tf.nn.softmax(logits)

# Compare prediction with labels to evaluate accuracy
correct_preds = tf.equal(tf.argmax(prediction, 1), tf.argmax(label, 1))
accuracy_rate = tf.reduce_mean(tf.cast(correct_preds, "float"))

In [None]:
def part_2(learning_rate, batch_size, num_epochs):
    train_errors = []
    valid_errors = []
    train_cost = []
    valid_cost = []
    testing_errors = []

    train_op = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)# momentum=momentum)

    # Initializing the variables
    init = tf.initialize_all_variables()

    # Launch the graph
    with tf.Session() as sess:
        sess.run(init)

        # Training cycle
        for epoch in range(num_epochs):
            num_batches = int(training_set_size/batch_size)
            # Loop over all batches
            for i in range(num_batches):
                batch_xs = training_images[i * batch_size: (i + 1) * batch_size]
                batch_ys = training_labels[i * batch_size: (i + 1) * batch_size]
                feed_dict = {input_images : batch_xs, label : batch_ys}
                l, _ = sess.run([cost, train_op], feed_dict=feed_dict)
                
            tc, ta = sess.run([cost, accuracy_rate], feed_dict={input_images: training_images, label: training_labels})
            train_errors.append((1-ta)*batch_size) 
            train_cost.append(-tc) 
            vc, va  = sess.run([cost, accuracy_rate], feed_dict={input_images: validation_images, label: validation_labels})
            valid_errors.append((1-va)*len(validation_labels)) 
            valid_cost.append(-vc)
            
            # Display per epoch 
            if (epoch % display_step) == 0:
                print "Epoch:", '%04d' % (epoch+1)
                print("Minibatch accuracy: %.5f, cost: %.5f " % ((1-train_errors[epoch]/len(validation_labels)), tc))
                print("Validation Accuracy: %.5f, cost: %.5f" % ((1-valid_errors[epoch]/batch_size), vc))
            testing_errors.append( sess.run(accuracy_rate,feed_dict={input_images: testing_images, label: testing_labels}) )
        #print "Training Completed"
        #print("Testing Accuracy: %.5f" % test_accuracy)
    return train_errors, valid_errors, train_cost, valid_cost, testing_errors

In [None]:
ta, va, tc, vc, test = part_2(learning_rate=0.5, batch_size=1000, num_epochs=200)

Epoch: 0001
Minibatch accuracy: 0.55860, cost: 0.29782 
Validation Accuracy: 0.55700, cost: 0.29725
Epoch: 0002
Minibatch accuracy: 0.71767, cost: 0.24518 
Validation Accuracy: 0.71100, cost: 0.24517
Epoch: 0003
Minibatch accuracy: 0.76840, cost: 0.21202 
Validation Accuracy: 0.74600, cost: 0.21245
Epoch: 0004
Minibatch accuracy: 0.79053, cost: 0.18991 
Validation Accuracy: 0.77400, cost: 0.19062
Epoch: 0005
Minibatch accuracy: 0.80347, cost: 0.17421 
Validation Accuracy: 0.79700, cost: 0.17511
Epoch: 0006
Minibatch accuracy: 0.81207, cost: 0.16243 
Validation Accuracy: 0.80300, cost: 0.16346
Epoch: 0007
Minibatch accuracy: 0.81733, cost: 0.15319 
Validation Accuracy: 0.80600, cost: 0.15432
Epoch: 0008
Minibatch accuracy: 0.82347, cost: 0.14573 
Validation Accuracy: 0.81300, cost: 0.14692
Epoch: 0009
Minibatch accuracy: 0.82833, cost: 0.13956 
Validation Accuracy: 0.82000, cost: 0.14081
Epoch: 0010
Minibatch accuracy: 0.83200, cost: 0.13439 
Validation Accuracy: 0.82600, cost: 0.13567


In [None]:
# Create a plot with respect to epoch size and validation error and training error
plt.figure(1,figsize=(10,10))
plt.xlabel("epochs"); plt.ylabel("Training  Error (blue) and Validation Error (red)")
plt.plot(range(1,201), ta,'bs')
plt.plot(range(1,201), va, 'ro')
# Create a plot with respect to epoch size and validation L and training L
plt.figure(2,figsize=(10,10))
plt.xlabel("epochs"); plt.ylabel("Training Log-likelihood (yellow) and Validation Log-likelihood (red)")
plt.plot(range(1,201), tc,'bo')
plt.plot(range(1,201), vc, 'ro')