In [None]:
import tensorflow as tf
import os, struct
import numpy as np
import matplotlib.pyplot as plt
%matplotlib notebook
from sklearn.datasets import fetch_mldata

**Load the data**

In [None]:
# Load the dataset
mnist_dict = fetch_mldata("MNIST original")

In [None]:
# Get the data
X_all = mnist_dict['data']
Y_all = mnist_dict['target']
print("Shape of X_all: ", X_all.shape)
print("Shape of Y_all: ", Y_all.shape)

# Get the number of classes (10) and the dimensionality of the input data (28*28)
num_classes = len(np.unique(Y_all))
num_pixels = X_all.shape[1]
print("Number of classes: ", num_classes)
print("Number of pixels: ", num_pixels)

**Split into training/test/validation**

In [None]:
# Number of observations in each group
n_training_cases = 60000
n_test_cases = 8000

# Create a permutation vector
perm = np.random.permutation(X_all.shape[0])

# Split into training/test/validation
X_train = X_all[perm[:n_training_cases], :].astype(np.float32)
Y_train = Y_all[perm[:n_training_cases], None].astype(np.int32)

X_test = X_all[perm[n_training_cases:n_training_cases+n_test_cases], :].astype(np.float32)
Y_test = Y_all[perm[n_training_cases:n_training_cases+n_test_cases], None].astype(np.int32)

X_val = X_all[perm[n_training_cases+n_test_cases:], :].astype(np.float32)
Y_val = Y_all[perm[n_training_cases+n_test_cases:], None].astype(np.int32)

print("Shape of X_train: ", X_train.shape)
print("Shape of X_test: ", X_test.shape)
print("Shape of X_val: ", X_val.shape)

**Accuracy function**

In [None]:
def accuracy(predictions, labels):
    return (100.0 * np.sum(np.argmax(predictions, axis=1) == np.squeeze(labels)) / predictions.shape[0])

**Auxiliary TensorFlow functions**

In [None]:
def create_weight(shape):
    return tf.Variable( tf.truncated_normal(shape, stddev=0.01) )

def create_bias(shape):
    return tf.Variable( tf.constant(0.1, shape=shape) )

def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1], padding='SAME')

def next_minibatch(batch_size):
    # Create a vector with batch_size random integers
    perm = np.random.permutation(X_train.shape[0])
    perm = perm[:batch_size]
    # Generate the minibatch
    X_batch = X_train[perm, :]
    Y_batch = Y_train[perm, :]
    # Return the images and the labels
    return X_batch, Y_batch

def all_computations(X, W_0, b_0, W_2, b_2, W_4, b_4, W_5, b_5):
    Z_0 = tf.nn.relu( conv2d(X, W_0) + b_0 )
    Z_1 = max_pool_2x2(Z_0)
    Z_2 = tf.nn.relu( conv2d(Z_1, W_2) + b_2)
    Z_3 = max_pool_2x2(Z_2)
    aux4 = tf.reshape( Z_3, [-1, 7*7*64] )
    Z_4 = tf.nn.relu( tf.matmul(aux4, W_4) + b_4 )
    x_logits = tf.matmul(Z_4, W_5) + b_5
    return( x_logits )

In [None]:
def plot_weights_0(W_0):
    plt.figure()
    for j in range(32):
        ax = plt.subplot(4,8,j+1)
        ax.imshow(np.squeeze(W_0[:,:,:,j]), cmap=plt.cm.gray)
        plt.axis('off')
    plt.show()

In [None]:
def plot_correct(X, predictions, labels, n_images=5):
    idxCorrect = (np.argmax(predictions, axis=1) == np.squeeze(labels))
    idx = np.nonzero(idxCorrect)[0]
    plt.figure()
    for j in range(n_images):
        n = idx[j]
        prob_sorted = np.sort(predictions[n,:])
        classes_sorted = np.argsort(predictions[n,:])
        x_reshaped = X[n,:].reshape((28, 28))
        str_title = ''
        
        for c in range(4):
            str_title = str_title+'Digit #'+str(classes_sorted[-c-1])+', prob='+str(prob_sorted[-c-1])+'\n'
        
        ax = plt.subplot(1,n_images,j+1)
        ax.imshow(x_reshaped, cmap=plt.cm.gray)
        plt.title(str_title, fontsize=7)
        plt.axis('off')
    plt.show()
    
def plot_incorrect(X, predictions, labels, n_images=5):
    idxIncorrect = (np.argmax(predictions, axis=1) != np.squeeze(labels))
    idx = np.nonzero(idxIncorrect)[0]
    plt.figure()
    for j in range(n_images):
        n = idx[j]
        prob_sorted = np.sort(predictions[n,:])
        classes_sorted = np.argsort(predictions[n,:])
        x_reshaped = X[n,:].reshape((28, 28))
        str_title = ''
        
        for c in range(4):
            str_title = str_title+'Digit #'+str(classes_sorted[-c-1])+', prob='+str(prob_sorted[-c-1])+'\n'
        
        ax = plt.subplot(1,n_images,j+1)
        ax.imshow(x_reshaped, cmap=plt.cm.gray)
        plt.title(str_title, fontsize=7)
        plt.axis('off')
    plt.show() 

**TensorFlow code** (computational graph + session)

In [None]:
# Create the computational graph
learning_rate = 0.0001
batch_size = 100
max_iterations = 7000

K_0 = 32
K_1 = 64
K_4 = 1024

my_graph = tf.Graph()
with my_graph.as_default():
    # (a) Input data
    tf_train_data = tf.placeholder(tf.float32, shape=(batch_size, num_pixels))
    tf_train_labels = tf.placeholder(tf.int32, shape=(batch_size))
    tf_test_data = tf.constant(X_test)
    tf_val_data = tf.constant(X_val)
    
    # Reshape the images to make them (images x height x width x color)
    train_images = tf.reshape(tf_train_data, [-1,28,28,1])
    test_images = tf.reshape(tf_test_data, [-1,28,28,1])
    val_images = tf.reshape(tf_val_data, [-1,28,28,1])
    
    # (b) Variables
    W_0 = create_weight([5, 5, 1, K_0])
    b_0 = create_bias([K_0])
    W_2 = create_weight([5, 5, K_0, K_1])
    b_2 = create_bias([K_1])
    W_4 = create_weight([7 * 7 * K_1, K_4])
    b_4 = create_bias([K_4])
    W_5 = create_weight([K_4, num_classes])
    b_5 = create_bias([num_classes])
    
    # (c) Computations
    train_logits = all_computations(train_images, W_0, b_0, W_2, b_2, W_4, b_4, W_5, b_5)
    loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(train_logits, tf_train_labels) )

    # (d) Optimizer
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

    # (e) Other tasks
    train_prediction = tf.nn.softmax( train_logits )
    val_prediction = tf.nn.softmax( all_computations(val_images, W_0, b_0, W_2, b_2, W_4, b_4, W_5, b_5) )
    test_prediction = tf.nn.softmax( all_computations(test_images, W_0, b_0, W_2, b_2, W_4, b_4, W_5, b_5) )

with tf.Session(graph=my_graph) as session:
    # 1. We initialize the weights and biases. This is a one-time operation
    tf.initialize_all_variables().run()
    print("Initialized")

    # 2. Run SGD
    for step in range(max_iterations):
        # Get a new minibatch of data
        X_batch, Y_batch = next_minibatch(batch_size)

        # Prepare a dictionary telling the session where to feed the minibatch.
        # The key of the dictionary is the placeholder node of the graph to be fed,
        # and the value is the numpy array to feed to it.
        feed_dict = { tf_train_data   : X_batch,
                      tf_train_labels : np.squeeze(Y_batch) }

        # Run the computations
        _, l, predictions = session.run( [optimizer, loss, train_prediction], feed_dict=feed_dict )

        # Every 500 iterations
        if (step % 500 == 0):
            # Print the loss
            print("Minibatch loss at step %d: %f" % (step, l))
            # Obtain and print the accuracy on the training set
            print(" +Minibatch accuracy: %.1f%%" % accuracy(predictions, Y_batch))
            # Obtain and print the accuracy on the validation set
            print(" +Validation accuracy: %.1f%%" % accuracy(val_prediction.eval(), Y_val))

    # 3. Accuracty on the test set
    print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), Y_test))
    
    # 4. Plot correctly and incorrectly classified images
    plot_incorrect(X_test, test_prediction.eval(), Y_test) 
    plot_correct(X_test, test_prediction.eval(), Y_test) 
    
    # 5. Plot the weights of the first hidden layer
    plot_weights_0(W_0.eval())
