In [52]:
import tensorflow as tf
import pandas as pd
import numpy as np

In [53]:
trainx = pd.read_csv("usps_trainx.data", header=None, delimiter=r"\s+")
trainy = pd.read_csv("usps_trainy.data", header=None, delimiter=r"\s+")
testx = pd.read_csv("usps_testx.data", header=None, delimiter=r"\s+")
testy = pd.read_csv("usps_testy.data", header=None, delimiter=r"\s+")

im_height = 16 #image height
im_width = 16 #image width
num_classes = 10 #number of classes for classificaiton

#normalise the images
trainx = trainx / 256
testx  = testx  / 256

#transform pandas to numpy arrays
trainx = trainx.as_matrix()
trainy = trainy.as_matrix()
testx = testx.as_matrix()
testy = testy.as_matrix()

#remove the second dimensions:
trainy = trainy[:,0]
testy = testy[:,0]

#randomly shuffle the train data
import random
random_idx = random.sample([x for x in range(len(trainx))],len(trainx))
trainx = trainx[random_idx,]
trainy = trainy[random_idx]


In [54]:
# Transform the images in a 16x16 form; the resulting tensor would be 2000x16x16 both for training and for test data.
# Simplified by Dr Seth Flaxman
trainx = trainx.reshape(len(trainx),16,16)
testx = testx.reshape(len(trainx),16,16)

In [55]:
# 1-hot encode the labels trainy and testy
# Simplified by Dr Seth Flaxman
trainy = pd.get_dummies(trainy).as_matrix()
testy = pd.get_dummies(testy).as_matrix()

In [56]:
# Some notation for convenience
train_size = len(trainx)
test_size = len(testx)

In [57]:
# Extract batches funciton that returns a dictionary ready to be fed to the model

def next_batch(data_x,data_y,indices):
    return data_x[indices,:,:] , data_y[indices,:]
    

In [58]:
# Defining the convolution operation (with relu activation), and the maxpool operation
# https://github.com/aymericdamien/TensorFlow-Examples/blob/master/notebooks/3_NeuralNetworks/convolutional_network.ipynb

def conv2d(x, W, b, strides=1):
    # Conv2D wrapper, with bias and relu activation
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)

def maxpool2d(x, k=2):
    # MaxPool2D wrapper
    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],
                          padding='SAME')

In [59]:
# CONSTRUCT THE COMPUTATIONAL GRAPH


batch_size = 5
# an epoch is one run over all the training data
num_epochs = 60  #optimal: 400
learning_rate = 0.001

c = 5 #convolution window size
stride = 2 #stride of the convolution
conv1_size = 64 #number of outputs from the conv layer

c2 = 5 #2nd convolution window size
stride_2 = 2 #stride of the 2nd convolution
conv2_size = 32 #number of outputs from the 2nd conv layer

full_conn_size = 1024 #number of nodes in the fully connected layer

compute_graph = tf.Graph()
# CONSTRUCT THE GRAPH
with compute_graph.as_default():
    
    #introduce placeholders for the data(images and labels) to later feed batches into
    X = tf.placeholder(tf.float32, shape=(batch_size,im_height,im_width)) #images
    y = tf.placeholder(tf.int32, shape=(batch_size,num_classes)) #labels
    
    #transform input into a 4d tensor to include the colour channels
    X_prime = tf.reshape(X, shape=[-1, im_height, im_width, 1])
    
    #define the parameters(variables) of the model
    #for the convolution layer:
    W1 = tf.Variable(tf.random_normal([c, c, 1, conv1_size]))
    b1 = tf.Variable(tf.random_normal([conv1_size]))
    #for the second convolution layer:
    W2 = tf.Variable(tf.random_normal([c2, c2, conv1_size, conv2_size]))
    b2 = tf.Variable(tf.random_normal([conv2_size]))
    #for the fully connected layer:
    W3 = tf.Variable(tf.random_normal([1*1*conv2_size, full_conn_size]))
    b3 = tf.Variable(tf.random_normal([full_conn_size]))
    #for the output:
    WO = tf.Variable(tf.random_normal([full_conn_size, num_classes]))
    bO = tf.Variable(tf.random_normal([num_classes]))
    
    
    #define the model structure
    # c x c convolution, 1 input, conv1_size outputs
    convolution = conv2d(X_prime, W1, b1, stride)
    # then apply pooling
    pool = maxpool2d(convolution)
    
    #one more convolution+pooling layer:
    convolution2 = conv2d(pool, W2, b2, stride_2)
    # then apply pooling
    pool2 = maxpool2d(convolution2)
    
    
    #add a fully connected layer
    # Reshape convolution output to fit the fully connected layer input
    full_conn = tf.reshape(pool2, [-1, 1*1*conv2_size])
    full_conn = tf.add(tf.matmul(full_conn, W3), b3)
    full_conn = tf.nn.relu(full_conn)
    

    # Output, class prediction
    predictions = tf.add(tf.matmul(full_conn, WO), bO)
    
    # Define loss and optimizer
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=predictions, labels=y))
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
    
    # Evaluate model
    # https://github.com/aymericdamien/TensorFlow-Examples/blob/master/notebooks/3_NeuralNetworks/convolutional_network.ipynb
    correct_pred = tf.equal(tf.argmax(predictions, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    
""""   
    # Feed Dict and RUN SESSION
    num_batches = int(len(trainx)/batch_size)

    sess = tf.Session()
    tf.global_variables_initializer().run(session=sess)
    # Feed the batches one by one
    for t in range(num_epochs):
        for i in range(num_batches):
            batch_X, batch_y = next_batch(trainx,trainy,[x for x in range(len(trainx))][i*batch_size:(i+1)*batch_size])
            sess.run(optimizer, feed_dict={X: batch_X, y: batch_y})
"""

#    for _ in range(1000):
#        batch_xs, batch_ys = mnist.train.next_batch(100)
#        sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})


'"   \n    # Feed Dict and RUN SESSION\n    num_batches = int(len(trainx)/batch_size)\n\n    sess = tf.Session()\n    tf.global_variables_initializer().run(session=sess)\n    # Feed the batches one by one\n    for t in range(num_epochs):\n        for i in range(num_batches):\n            batch_X, batch_y = next_batch(trainx,trainy,[x for x in range(len(trainx))][i*batch_size:(i+1)*batch_size])\n            sess.run(optimizer, feed_dict={X: batch_X, y: batch_y})\n'

In [60]:
# TUNE HYPERPARAMETERS
# Note: this can take a while to run
# Extract (from the test set) a small validation set to test the model accuracy while tuning the hyperparameters

valid_size = 200

trainx_ = trainx[0:train_size-valid_size]
trainy_ = trainy[0:train_size-valid_size]
validx = trainx[train_size-valid_size:]
validy = trainy[train_size-valid_size:]

num_train_batches = int(len(trainx_)/batch_size)
train_accuracy = np.zeros(num_train_batches)

num_valid_batches = int(len(validx)/batch_size)
valid_accuracy = np.zeros(num_valid_batches)


# Initialise all variables and run the session:
with tf.Session(graph=compute_graph) as sess:
    tf.initialize_all_variables().run(session=sess)
    
    # Feed the batches one by one
    for t in range(num_epochs):
        for i in range(num_train_batches):
            batch_X, batch_y = next_batch(trainx_,trainy_,[x for x in range(train_size-valid_size)][i*batch_size:(i+1)*batch_size])
            sess.run(optimizer, feed_dict={X: batch_X, y: batch_y})
            train_accuracy[i] = sess.run(accuracy, feed_dict={X: batch_X, y: batch_y})
        
        # report train/valid accuracy regularly in order to perform early stoping
        if((t+1) % 100 == 0 or ((t+1) % 5 == 0 and (t+1) <= 100) or (t+1) <= 10):
            print("Train accuracy at epoch", t+1, "is:", sum(train_accuracy)/num_train_batches)
            # validation accuracy
            for i in range(num_valid_batches):
                batch_X, batch_y = next_batch(validx,validy,[x for x in range(valid_size)][i*batch_size:(i+1)*batch_size])
                sess.run(accuracy, feed_dict={X: batch_X, y: batch_y})
                valid_accuracy[i] = sess.run(accuracy, feed_dict={X: batch_X, y: batch_y})
            print("Valid accuracy at epoch", t+1, "is:", sum(valid_accuracy)/num_valid_batches)


Instructions for updating:
Use `tf.global_variables_initializer` instead.
Train accuracy at epoch 1 is: 0.340555562948
Valid accuracy at epoch 1 is: 0.560000011325
Train accuracy at epoch 2 is: 0.656111123164
Valid accuracy at epoch 2 is: 0.715000014007
Train accuracy at epoch 3 is: 0.776666676584
Valid accuracy at epoch 3 is: 0.755000011995
Train accuracy at epoch 4 is: 0.834444452822
Valid accuracy at epoch 4 is: 0.805000010878
Train accuracy at epoch 5 is: 0.880555562013
Valid accuracy at epoch 5 is: 0.830000008643
Train accuracy at epoch 6 is: 0.908888893988
Valid accuracy at epoch 6 is: 0.8600000076
Train accuracy at epoch 7 is: 0.932222226097
Valid accuracy at epoch 7 is: 0.800000010431
Train accuracy at epoch 8 is: 0.935000003874
Valid accuracy at epoch 8 is: 0.880000006407
Train accuracy at epoch 9 is: 0.955000002682
Valid accuracy at epoch 9 is: 0.850000008196
Train accuracy at epoch 10 is: 0.958888891132
Valid accuracy at epoch 10 is: 0.840000008792
Train accuracy at epoch 15

In [46]:
# TUNE HYPERPARAMETERS
# Do cross-validation to check the model performance and tune the parameters (num layers, layer types, batch size, etc.)
# Note: it's not wise to use the test set in hyperparameter tuning

#num_cross_valid = 10 #number of cross-validation sectors
#train_accuracy = np.zeros(num_cross_valid)

# Train the model on each 9 of the 10 train data sectors, while recording the test accuracy on the remaining segment
#for i in range(num_cross_valid):
#    train_batch_X, train_batch_y = next_batch(trainx,trainy,[x for x in range(len(trainx))][i*batch_size:(i+1)*batch_size])
#    sess.run(optimizer, feed_dict={X: train_batch_X, y: train_batch_y})
#    test_batch_X, test_batch_y = next_batch(trainx,trainy,[x for x in range(len(trainx))][i*batch_size:(i+1)*batch_size])
#    train_accuracy[i] = sess.run(accuracy, feed_dict={X: batch_X, y: batch_y})

In [47]:
len(trainx_)

1800

In [61]:
# NOW TRAIN THE MODEL OVER THE WHOLE DATASET AND REPORT TEST ACCURACY

num_batches = int(train_size/batch_size)
train_accuracy = np.zeros(num_batches)

num_test_batches = int(test_size/batch_size)
accuracy_ = np.zeros(num_test_batches)

# Initialise all variables and run the session
with tf.Session(graph=compute_graph) as sess:
    tf.initialize_all_variables().run(session=sess)

    # Feed the batches one by one
    for t in range(num_epochs):
        for i in range(num_batches):
            batch_X, batch_y = next_batch(trainx,trainy,[x for x in range(train_size)][i*batch_size:(i+1)*batch_size])
            sess.run(optimizer, feed_dict={X: batch_X, y: batch_y})
            train_accuracy[i] = sess.run(accuracy, feed_dict={X: batch_X, y: batch_y})
        if((t+1) % 100 == 0 or ((t+1) % 10 == 0 and (t+1) <= 100) or (t+1) <= 10):
            print("The train accuracy at epoch", t+1, "is:", sum(train_accuracy)/num_batches)
            
    
        
    
    for i in range(num_test_batches):
        batch_X, batch_y = next_batch(testx,testy,[x for x in range(len(testx))][i*batch_size:(i+1)*batch_size])
        accuracy_[i] = sess.run(accuracy, feed_dict={X: batch_X, y: batch_y})
        
    print("The test accuracy is ",sum(accuracy_)/num_test_batches)

Instructions for updating:
Use `tf.global_variables_initializer` instead.
The train accuracy at epoch 1 is: 0.427500009723
The train accuracy at epoch 2 is: 0.690500013158
The train accuracy at epoch 3 is: 0.798500010185
The train accuracy at epoch 4 is: 0.853500008211
The train accuracy at epoch 5 is: 0.889000006318
The train accuracy at epoch 6 is: 0.919000004753
The train accuracy at epoch 7 is: 0.931500004008
The train accuracy at epoch 8 is: 0.94950000301
The train accuracy at epoch 9 is: 0.957500002533
The train accuracy at epoch 10 is: 0.967500001937
The train accuracy at epoch 20 is: 0.992000000477
The train accuracy at epoch 30 is: 0.993500000387
The train accuracy at epoch 40 is: 1.0
The train accuracy at epoch 50 is: 1.0
The train accuracy at epoch 60 is: 1.0
The test accuracy is  0.935500003472
