# Competition Assignment


## Import all libraries

In [5]:
import numpy as np
import tensorflow as tf
import os
from svhn_helper import SVHN
from matplotlib import pyplot as plt

Note that it is necessary for the training data to reside in this notebook's directory.

## Investigate the data

In [6]:
def print_statistics():
    ############################################################################
    #                         Print class distribution                         #
    ############################################################################
    print('Percentage of labels in train and validation set')
    train_labels = svhn._training_labels
    validation_labels = svhn._validation_labels
    train_dist = np.histogram(train_labels)[0] / len(train_labels)
    validation_dist = np.histogram(validation_labels)[0]  / len(validation_labels)
    print('%15s | %10s' % ('train', 'validation'))
    print(' ' * 5 + '-' * 23)
    for index, (t, v) in enumerate(zip(train_dist, validation_dist)):
        print('%5d%10f | %10f' % (index + 1, t, v))

def plot_mispredictions(model, filename, data, labels):
    import tensorflow as tf
    with tf.Session().as_default() as session:
        saver = tf.train.Saver()
        saver.restore(session, filename)
        validation_predictions = model.predict(session, data)
        actual_labels = labels
        mispredictions = np.argwhere(actual_labels != validation_predictions)
        print(f'Number of misclassifications: {len(mispredictions):d}')
        print(f'Percent: {len(mispredictions)/len(actual_labels)*100:f}')
        N = 10**2
        mislabeled_data = data[mispredictions, ...]
        mislabeled_labels = validation_predictions[mispredictions][:, 0]
        plot(list(zip(mislabeled_data[:N], mislabeled_labels[:N])))

def plot(data):

    N = len(data)
    categories = ['0','1','2','3','4','5','6','7','8','9','0']
    h, w = (int(np.floor(np.sqrt(N))), int(np.ceil(np.sqrt(N))))
    f, axarr = plt.subplots(h, w)
    for i in range(h):
        for j in range(w):
            index = 4*i+j

            ax = axarr[i][j]

            img = data[index][0].reshape((32,32))

            label = data[index][1]

            ax.set_title(categories[label])
            ax.imshow(img, cmap='gray')
            ax.get_xaxis().set_visible(False)
            ax.get_yaxis().set_visible(False)

    plt.show()

def main():
    N = 100
    N_train, N_val = svhn.get_sizes()[:2]
    random_indices_train = np.random.choice(N_train, N, replace=False)
    random_indices_val = np.random.choice(N_val, N, replace=False)

    train_batch = list(zip(svhn._training_data[random_indices_train],
        svhn._training_labels[random_indices_train]))

    val_batch = list(zip(svhn._validation_data[random_indices_val],
        svhn._validation_labels[random_indices_val]))

    print_statistics()
    plot(train_batch)
    plot(val_batch)

if __name__ == "__main__":
    svhn = SVHN()
    main()



## Model Helper
To make this stuff easier in the future, we are creating some model helper classes and functions in order to ease the process of training and model handling.
### Model Class
This class acts as an abstract base class for our network models.

In [7]:
class BaseModel(object):
    '''Base model class.

    Attributes
    ----------
    opt   :   str
                    TF optimizer to use
    act_fn  :   function
                    tf.nn function for neuron activation
    '''

    def run_training_step(self, session, data, labels):
        '''Run forward pass through net and apply gradients once.

        Parameters
        ----------
        session :   tf.Session
                    Session to use for executing everything
        data    :   np.ndarray
                    Input data
        labels  :   np.ndarray
                    Input labels
        '''
        pass

    def get_accuracy(self, session, data, labels):
        '''Run forward pass through net and compute accuracy.

        Parameters
        ----------
        session :   tf.Session
                    Session to use for executing everything
        data    :   np.ndarray
                    Input data
        labels  :   np.ndarray
                    Input labels

        Returns
        -------
        float
        '''
        pass

    def predict(self, session, data):
        '''Get model predictions for data.

        Parameters
        ----------
        session :   tf.Session
        data    :   np.ndarray
                    Must fit the model's input placeholder

        Returns
        -------
        np.ndarray
            Array of which output neuron is most active for each input

        '''
        pass

    def __init__(self, optimizer, activation):
        '''Init new model

        Parameters
        ----------
        lr  :   float
                Learning rate for the optimizer
        optimizer   :   tf.train.Optimizer
                        TF optimizer to use
        activation  :   function
                        tf.nn function for neuron activation
        '''
        self.opt = optimizer
        self.act_fn = activation


### Utility Functions

In [8]:
import sys
import argparse
import importlib
import tensorflow as tf
import numpy as np

norm_n = 0


def batch_norm_layer(input):
    '''Create a layer that normalizes the batch with it's mean and variance.'''
    global norm_n
    norm_n += 1
    with tf.variable_scope('norm%d' % norm_n):
        mean, var = tf.nn.moments(input, axes=[0, 1, 1])
        return tf.nn.batch_normalization(input, mean, var, 0, 1, 1e-10)

pool_n = 0


def max_pool_layer(input, ksize, strides):
    global pool_n
    pool_n += 1
    with tf.variable_scope('pool%d' % pool_n):
        return tf.nn.max_pool(input,
                ksize=ksize, strides=strides, padding='SAME')

conv_n = 0


def conv_layer(input, kshape, strides=(1, 1, 1, 1), activation=tf.nn.tanh):
    '''Create a convolutional layer with fixed activation function and variable
    initialisation. The activation function is ``tf.nn.tanh`` and variables are
    initialised from a truncated normal distribution with an stddev of 0.1

    Parameters
    ----------
    input   :   tf.Variable
                Input to the layer
    kshape  :   tuple or list
                Shape of the kernel tensor
    strides :   tuple or list
                Strides

    Returns
    -------
    tf.Variable
            The variable representing the layer activation (tanh(conv + bias))

    '''
    global conv_n
    conv_n += 1
    # this adds a prefix to all variable names
    with tf.variable_scope('conv%d' % conv_n):
        kernels = tf.Variable(
            tf.truncated_normal(
                kshape,
                stddev=0.1),
            kshape, name='kernels')
        bias_shape = (kshape[-1],)
        biases = tf.Variable(
            tf.truncated_normal(
                bias_shape,
                stddev=0.1), name='bias')
        conv = tf.nn.conv2d(
            input,
            kernels,
            strides,
            padding='SAME',
            name='conv')
        return activation(tf.nn.tanh(conv + biases, name='activation'))


fc_n = 0


def fully_connected(input, n_out, with_activation=False, activation=tf.nn.tanh):
    '''Create a fully connected layer with fixed activation function and variable
    initialisation. The activation function is ``tf.nn.tanh`` and variables are
    initialised from a truncated normal distribution with an stddev of 0.1

    Parameters
    ----------
    input   :   tf.Variable
                Input to the layer
    n_out   :   int
                Number of neurons in the layer
    with_activation :   bool
                        Return activation or drive (useful when planning to use
                        ``softmax_cross_entropy_with_logits`` which requires
                        unscaled logits)


    Returns
    -------
    tf.Variable
            The variable representing the layer activation (tanh(input * Weights
            + bias))
    '''
    global fc_n
    fc_n += 1
    with tf.variable_scope('fully%d' % fc_n):
        init = tf.truncated_normal_initializer(stddev=0.1)
        W = tf.get_variable(
                'weights',
                initializer=init,
                shape=(input.shape[-1], n_out), # the last dim of the input
               dtype=tf.float32                 # is the 1st dim of the weights
            )
        bias = tf.get_variable('bias', initializer=init, shape=(n_out,))
        drive = tf.matmul(input, W) + bias
        if with_activation:
            return activation(drive)
        else:
            return drive



weighted_pool_n = 0


def weighted_pool_layer(input_layer, ksize, strides=(1, 1, 1, 1)):
    '''Helper function to do mixed max/avg pooling

    Parameters
    ----------
    input_layer :   tf.Tensor
                    4D tensor
    Returns
    -------
    tf.Tensor
           Tthe 4D tensor after being pooled
    '''
    global weighted_pool_n
    weighted_pool_n += 1
    with tf.variable_scope('weight_pool%d' % weighted_pool_n):
        a = tf.get_variable('a',
                initializer=tf.truncated_normal_initializer(),
                shape=(1,),
                dtype=tf.float32, trainable=True)
        max_pool = tf.nn.max_pool(input_layer, ksize, strides, padding='SAME')
        avg_pool = tf.nn.avg_pool(input_layer, ksize, strides, padding='SAME')
        pool = (a * max_pool + (1 - a) * avg_pool)
        return pool

This class can be used to train the model on a set of parameters.

In [9]:
class ParameterTest(object):
    '''Test one set of parameters to the train() function.'''
    def __init__(self, model, batch_size, epochs,
            train_function, learning_rate, ignore_saved):
        self.model = model
        self.batch_size = batch_size
        self.epochs = epochs
        self.accuracy = None
        self.train_function=train_function
        # sadly, we cannot always retrieve this from any optimizer
        self.learning_rate = learning_rate
        self.ignore_saved = ignore_saved

    def run(self):
        '''Run the training process with the specified settings.'''

        self.save_fname = 'checkpoints/{name}_{batch}_{lr}_{epochs}_{opti}_{act}.ckpt'.format(
                name=self.model.__class__.__name__,
                batch=self.batch_size,
                lr=self.learning_rate,
                epochs=self.epochs,
                opti=self.model.opt.get_name(),
                act=self.model.act_fn.__name__
        )
        self.accuracy = self.train_function(self.model, self.batch_size,
                self.epochs, self.save_fname, return_records=False,
                record_step=30, ignore_saved=self.ignore_saved)

    def __str__(self):
        return ('{opti:30}, learning rate={lr:5.4f}, batch size={bs:<5d}, '
                'epochs={epochs:<5d}, accuracy={acc:4.3f}'.format(
                    lr=self.learning_rate,
                    opti=self.model.opt.get_name(),
                    bs=self.batch_size,
                    epochs=self.epochs,
                    acc=self.accuracy
                    )
                )


## Actual training model
Here we are creating our actual training model which basically implements the functions defined in our abstract model class above.

In [10]:
class Model(BaseModel):
    '''Smaller model so we clock in at < 4mb'''

    def predict(self, session, data):
        return session.run([self.prediction], feed_dict={self.x: data,
            self.y_: np.zeros(data.shape[0])})[0]

    def run_training_step(self, session, data, labels):
        entropy, _ = session.run(
            [self.mean_cross_entropy, self.train_step],
            feed_dict={self.x: data, self.y_: labels})
        return entropy

    def get_accuracy(self, session, data, labels):
        return session.run([self.accuracy], feed_dict={self.x: data, self.y_:
            labels})[0]


       def __init__(self, optimizer, activation):
        super().__init__(optimizer, activation)

        ############################################################################
        #                             Define the graph                             #
        ############################################################################
        # It turns out that this network from ex03 is already capable of memorizing
        # the entire training or validation set, so we need to tweak generalization,
        # not capacity
        # In order to speed up convergence, we added batch normalization.
        # Our best effort was Adam optimizer with bs=32, lr=0.001 (only possible
        # because of norm)
        x = tf.placeholder(tf.float32, shape=(None, 32, 32, 1), name='input')
        y_ = tf.placeholder(dtype=tf.int32, shape=(None,), name='labels')

        self.x = x
        self.y_ = y_

        kernel_shape1 = (5, 5, 1, 8)
        activation1 = conv_layer(x, kernel_shape1, activation=activation)

        normalize1 = batch_norm_layer(activation1)

        pool1 = weighted_pool_layer(
            normalize1, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1)
        )

        kernel_shape2 = (3, 3, 8, 10)
        activation2 = conv_layer(pool1, kernel_shape2, activation=activation)

        normalize2 = batch_norm_layer(activation2)

        pool2 = weighted_pool_layer(
            normalize2, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1)
        )

        pool2_reshaped = tf.reshape(pool2, (-1, 8*8*10), name='reshaped1')
        fc1 = fully_connected(pool2_reshaped, 512, with_activation=True,
                activation=activation)

        fc2_logit = fully_connected(fc1, 10, activation=activation)

        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=fc2_logit,
                                                                labels=y_)
        mean_cross_entropy = tf.reduce_mean(cross_entropy)
        self.mean_cross_entropy = mean_cross_entropy
        train_step = optimizer.minimize(mean_cross_entropy)
        self.train_step = train_step
        self.prediction = tf.argmax(fc2_logit, 1, output_type=tf.int32)

        # check if neuron firing strongest coincides with max value position in real
        # labels
        correct_prediction = tf.equal(self.prediction, y_)
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        self.accuracy = accuracy


## Training Functionality
Here we are creating a function that will train our network. Additionally the trained weights are saved.

In [11]:
def train_model(model, batch_size, epochs, save_fname, return_records=False,
        record_step=20, ignore_saved=False):
    '''Train a model on the SVHN dataset.

    Parameters
    ----------
    model           :   Model (defined above)
                        The training model.
    batch_size      :   int
                        Size of training batch.
    epochs          :   int
                        Number of times to visit the entire training set.
    save_fname      :   string
                        The filename of the file carrying all the learned
                        variables.
    return_records  :   bool
                        Determines whether only the final accuracy (False) or a
                        history of all entropies and accuracies is returned.
    record_step     :   int
                        Accuracy on test set will be recorded every
                        ``record_step`` training steps.
    ignore_saved    :   bool
                        Do not load saved weights, if found

    Returns
    -------
    float OR tuple
            If ``return_records`` is set, all entropies and accuracies are
            returned. Else the best accuracy is returned.

    '''

    svhn = SVHN()

    # keeep records of performance
    entropies = []
    accuracies = []
    best_accuracy = 0

    with tf.Session() as sess:

        ########################################################################
        #                             Load weights                             #
        ########################################################################
        saver = tf.train.Saver()
        if not ignore_saved and os.path.exists(save_fname + '.meta'):
            print('Using saved weights.')
            saver.restore(sess, save_fname)
            final_accuracy = model.get_accuracy(sess, svhn._validation_data,
                                svhn._validation_labels)
            return final_accuracy
        else:
            ####################################################################
            #                             Training                             #
            ####################################################################
            sess.run(tf.global_variables_initializer())

            # number of training steps
            training_step = 0

            for epoch in range(epochs):
                print('Starting epoch %d' % epoch)

                # run one batch
                for data, labels in svhn.get_training_batch(batch_size):
                    entropy = model.run_training_step(sess, data, labels)
                    entropies.append(entropy)

                    # compute validation accuracy every record_step steps
                    if training_step % record_step == 0:
                        val_accuracy = model.get_accuracy(sess, svhn._validation_data,
                                svhn._validation_labels)
                        accuracies.append(val_accuracy)
                        # in case we need it later
                        final_accuracy = val_accuracy
                        print('Current validation accuracy %f' % val_accuracy)

                        # save if better
                        if val_accuracy > best_accuracy:
                            best_accuracy = val_accuracy
                            print('Saving model with accuracy %f.' % val_accuracy)
                            saver.save(sess, save_fname)

                    training_step += 1

                    # stop early if convergence too slow
                    if epoch == 1:
                        if val_accuracy < 0.2:
                            raise RuntimeError('This isn\'t going anywhere.')

            ####################################################################
            #               Make final recordings, if necessary                #
            ####################################################################
            if training_step % record_step == 1:
                # we just recorded, final_accuracy already correct
                pass
            else:
                # we need to recompute the final accuracy
                final_accuracy = model.get_accuracy(sess, svhn._validation_data,
                                    svhn._validation_labels)
                accuracies.append(final_accuracy)

            ####################################################################
            #                     Print misclassifications                     #
            ####################################################################
            if return_records:
                return entropies, accuracies
            else:
                return best_accuracy



In [23]:
optimizer_cls  = getattr(tf.train, 'AdamOptimizer')
optimizer      = optimizer_cls(0.001)
model          = Model(optimizer, tf.nn.relu)
epochs         = 15
batch_size     = 32
entropies, accuracies = train_model(model, batch_size, epochs, "./weights/", True, 20, ignore_saved=True)

### Plot entropies and accuracies

In [26]:
plt.plot(entropies)
plt.title('Entropy')
plt.xlabel('Train step')
plt.ylabel('Entropy')
plt.show()
plt.title('Accuracy')
plt.plot(accuracies)
plt.xlabel('Record step')
plt.ylabel('Accuracy')
plt.show()

## Evaluate the model
Somehow, we don't get how tensorflow treats the files generated by `saver.save()`. Within this notebook, it refuses to load. You'll have to figure out by yourself, how to properly load them, or take a look at out `*.py` files.

In [27]:
with tf.Session() as session:
    saver = tf.train.Saver()
    saver.restore(session,  tf.train.latest_checkpoint("./weights/"))
    
    test_accuracy = 0
    for step, (images, labels) in enumerate(svhn.get_test_batch(300)):
        test_accuracy += session.run(
            accuracy,
            feed_dict = {x: images, desired: labels}
        )
    
print("Test Accuracy: " + str(test_accuracy / step))