# Assignment 2
## Exercise 1 + 2 + 3
First we adding some imports that we are going to use define the provided functions of `mnist-helper.py`.

In [214]:
import numpy as np
np.random.seed(1)
import tensorflow as tf
import matplotlib.pyplot as plt
from math import sqrt
from tensorflow import truncated_normal_initializer

In [215]:
class MNIST():
    def __init__(self, directory):
        self._directory = directory
        
        self._training_data = self._load_binaries("train-images-idx3-ubyte")
        self._training_labels = self._load_binaries("train-labels-idx1-ubyte")
        self._test_data = self._load_binaries("t10k-images-idx3-ubyte")
        self._test_labels = self._load_binaries("t10k-labels-idx1-ubyte")
        
        np.random.seed(0)
        samples_n = self._training_labels.shape[0]
        random_indices = np.random.choice(samples_n, samples_n // 10, replace = False)
        np.random.seed()
        
        self._validation_data = self._training_data[random_indices]
        self._validation_labels = self._training_labels[random_indices]
        self._training_data = np.delete(self._training_data, random_indices, axis = 0)
        self._training_labels = np.delete(self._training_labels, random_indices)
    
    def _load_binaries(self, file_name):
        path = os.path.join(self._directory, file_name)
        
        with open(path, 'rb') as fd:
            check, items_n = struct.unpack(">ii", fd.read(8))

            if "images" in file_name and check == 2051:
                height, width = struct.unpack(">II", fd.read(8))
                images = np.fromfile(fd, dtype = 'uint8')
                return np.reshape(images, (items_n, height, width))
            elif "labels" in file_name and check == 2049:
                return np.fromfile(fd, dtype = 'uint8')
            else:
                raise ValueError("Not a MNIST file: " + path)
    
    
    def get_training_batch(self, batch_size):
        return self._get_batch(self._training_data, self._training_labels, batch_size)
    
    def get_validation_batch(self, batch_size):
        return self._get_batch(self._validation_data, self._validation_labels, batch_size)
    
    def get_test_batch(self, batch_size):
        return self._get_batch(self._test_data, self._test_labels, batch_size)
    
    def _get_batch(self, data, labels, batch_size):
        samples_n = labels.shape[0]
        if batch_size <= 0:
            batch_size = samples_n
        
        random_indices = np.random.choice(samples_n, samples_n, replace = False)
        data = data[random_indices]
        labels = labels[random_indices]
        for i in range(samples_n // batch_size):
            on = i * batch_size
            off = on + batch_size
            yield data[on:off], labels[on:off]
    
    
    def get_sizes(self):
        training_samples_n = self._training_labels.shape[0]
        validation_samples_n = self._validation_labels.shape[0]
        test_samples_n = self._test_labels.shape[0]
        return training_samples_n, validation_samples_n, test_samples_n

## Exercise 4
Here we are inspecting the data.

In [216]:
def plot_mnist_digits(*digits_labels):
    num = len(digits_labels)
    rows = int(sqrt(num))
    cols = int(num / rows + 0.5)
    f, axarr = plt.subplots(rows, cols)
    for row in range(rows):
        for col in range(cols):
            ax = axarr[row][col]
            index = row * rows + col
            ax.imshow(digits_labels[index][0],
                    cmap='gray')
            ax.set_title(digits_labels[index][1], x=-0.1,y=0.5)
            ax.get_xaxis().set_visible(False)
            ax.get_yaxis().set_visible(False)
    return f

# Create data loader
mnist = MNIST(".")

# Get training data for inspection.
train_imgs = mnist._training_data
train_labels = mnist._training_labels

# Plot 20 exemplary images.
f = plot_mnist_digits(*[(train_imgs[i, ...], train_labels[i]) for i in range(20)])
plt.show()

## Exercise 5
Create the data flow graph (DFG).

In [225]:
# Reset graph in order to cope with multiple cell executions.
tf.reset_default_graph()

# Weight matrix
mean = 0.0
std = 0.000002
W = tf.get_variable('weights', initializer=truncated_normal_initializer(mean, std, seed=1), shape=[28 * 28, 10])

# bias vector
b = tf.get_variable('bias', initializer=tf.zeros_initializer(), shape=[10])

# data vector
x = tf.placeholder(tf.float32, [None, 28 * 28], name='input')

# desired output (ie real labels)
d = tf.placeholder(tf.int32, [None, 1], name='labels')
# one-hot encoding produces a vecor of shape (batch, 1, 10) instead of (batch, 10)
d_1_hot = tf.squeeze(tf.one_hot(d, 10), axis=1)

# computed output of the network without activation
y = tf.matmul(x, W) + b

# loss function
cross_entropy      = tf.nn.softmax_cross_entropy_with_logits(logits = y, labels = d_1_hot)
mean_cross_entropy = tf.reduce_mean(cross_entropy)
# The optimizer should minimize the cross_entropy.
optimizer          = tf.train.GradientDescentOptimizer(learning_rate = 1e-5)#0.5)
training_step      = optimizer.minimize(cross_entropy)

## Exercise 6 + 7
Now we are training the network and store all the results.

In [226]:
# Get the data from our loader.
d_train, l_train, d_test, l_test = (mnist._training_data, mnist._training_labels, mnist._test_data, mnist._test_labels)

# The data comes in image format, which we flatten
d_test  = np.reshape(d_test, (-1, 28 * 28))
d_train = np.reshape(d_train, (-1, 28 * 28))

# The labels only have 1 dimensions, we need to blow it up to 2
l_test  = l_test[:, np.newaxis]
l_train = l_train[:, np.newaxis]

# check if neuron firing strongest coincides with max value position in real labels
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(d_1_hot, 1))
accuracy           = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# record accuracy
training_step_accuracy = []
test_step_accuracy     = []

# record cross-entropy
training_step_entropy = []
test_step_entropy     = []

# record weights
weights = []

# Specify number of epochs and the batch size
n_epochs   = 3
batch_size = 100

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    i = 0
    for epoch in range(n_epochs):
        print('Epoch %d' % epoch)
        for mb, labels in mnist._get_batch(d_train, l_train, batch_size):
            values = sess.run({'weights': W, 'step': training_step}, feed_dict={x: mb, d: labels})
            if i % 10 == 0:
                current_train_accuracy = sess.run(accuracy, feed_dict={x: d_train, d: l_train})
                current_test_accuracy = sess.run(accuracy, feed_dict={x: d_test, d: l_test})
                training_step_accuracy.append(current_train_accuracy)
                test_step_accuracy.append(current_test_accuracy)

                current_train_entropy = sess.run(mean_cross_entropy, feed_dict={x: d_train, d: l_train})
                current_test_entropy = sess.run(mean_cross_entropy, feed_dict={x: d_test, d: l_test})
                training_step_entropy.append(current_train_entropy)
                test_step_entropy.append(current_test_entropy)

                weights.append(np.reshape(values['weights'], (28, 28, 10)))
            i += 1

### 6.1 Monitor the training process

In [None]:
# Problem: We append the accuray every 10th step, so we may miss the last one
print('(Almost) final test accuracy: %f' % test_step_accuracy[-1])

# Plot entropy and accuray
f = plt.figure()
ax_acc = f.add_subplot(121)
ax_acc.set_title('Accuracy over training and test sets')
ax_acc.set_xlabel('(n*10)th batch')
ax_acc.set_ylabel('Accuracy')
ax_acc.plot(test_step_accuracy, linestyle=':', label='Test set')
ax_acc.plot(training_step_accuracy, linestyle=':', label='Training set')
ax_acc.legend()

ax_entropy = f.add_subplot(122)
ax_entropy.set_title('Cross Entropy over training and test sets')
ax_entropy.set_xlabel('(n*10)th batch')
ax_entropy.set_ylabel('Cross Entropy')
ax_entropy.plot(test_step_entropy, linestyle=':', label='Test set')
ax_entropy.plot(training_step_entropy, linestyle=':', label='Training set')
ax_entropy.legend()
plt.show()

# Plot weights interactively
rows, cols = (2, 5)
f2, axarr = plt.subplots(rows, cols)
plt.ion()
for row in range(2):
    for col in range(5):
        ax = axarr[row][col]
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)

for i in range(len(weights)):
    current_weights = weights[i]
    for row in range(2):
        for col in range(5):
            f2.suptitle('Step %d of %d' % (i, len(weights)))
            ax = axarr[row][col]
            ax.cla()
            index = row * rows + col
            ax.set_title('Neuron %d' % index)
            # there's many diverging cmaps
            # (https://matplotlib.org/examples/color/colormaps_reference.html)
            ax.imshow(current_weights[..., index], cmap='Spectral')

    # pause so that it always takes 5 seconds
    # Note: The animation seems to slow down linearly, unless we clear the
    # axes (see above).
    # Uncomment this function if you have pyplot > v2.11. Details:
    #     https://stackoverflow.com/questions/46982150/matplotlib-pyplot-global-name-time-is-not-defined-error
    #plt.pause(5 / len(weights)) 