In [None]:
import sys
import numpy as np
import nn

# CNNs on MNIST
In the third part of the exercise we will now apply CNNs to MNIST.

First, take a look at the neural network code I placed into the nn package in this repository. It should look familiar as it is mainly the code you used in the last exercise. One thing that I added is a prototyped implementation of convolution and pooling. You will find these in nn/conv/layers.py.

After you have completed exercises 2 a) and 2 b) you should go into that file, and implement the missing pieces, which will essentially be the conv and pool functions you have already written as well as their backward pass (which might be a bit more tricky). 

Once you implemented those, come back here and make sure the following example works.

First, let us do gradient checking using your conv and pooling layers.

In [None]:
input_shape = (1, 1, 28, 28)
n_labels = 6
layers = [nn.InputLayer(input_shape)]

layers.append(nn.Conv(
                layers[-1],
                n_feats=2,
                filter_shape=(3,3),
                init_stddev=0.01,
                activation_fun=nn.Activation('relu'),
))
layers.append(nn.Pool(layers[-1]))
layers.append(nn.Flatten(layers[-1]))
layers.append(nn.FullyConnectedLayer(
                layers[-1],
                num_units=6,
                init_stddev=0.1,
                activation_fun=None
))
layers.append(nn.LinearOutput(layers[-1]))
net = nn.NeuralNetwork(layers)

In [None]:
# create random data
X = np.random.normal(size=input_shape)
Y = np.zeros((input_shape[0], n_labels))
for i in range(Y.shape[0]):
    idx = np.random.randint(n_labels)
    Y[i, idx] = 1.

In [None]:
# perform gradient checking, this should go through if you implemented everything correctly!
net.check_gradients(X, Y)

# Train on mnist
Finally, figure out a reasonable network architecture and train it on MNIST.

In [None]:
# you can load the mnist data as 
data = nn.data.mnist()

In [None]:
import sys
import numpy as np
import nn
import time
# you can load the mnist data as [(train_x, train_y), (valid_x, valid_y), (test_x, test_y)]
data = nn.data.mnist()
X_train = data[0][0]
y_train = data[0][1]

# Downsample training data to make it a bit faster for testing this code
n_train_samples = 10000
train_idxs = np.random.permutation(X_train.shape[0])[:n_train_samples]
X_train = X_train[train_idxs]
y_train = y_train[train_idxs]

n_valid_samples = 500
X_valid = data[1][0]
Y_valid = data[1][1]
valid_idxs = np.random.permutation(X_valid.shape[0])[:n_valid_samples]
X_valid = X_valid[valid_idxs]
Y_valid = Y_valid[valid_idxs]

n_test_samples = 500
X_test = data[2][0]
Y_test = data[2][1]
test_idxs = np.random.permutation(X_test.shape[0])[:n_test_samples]
X_test = X_test[test_idxs]
Y_test = Y_test[test_idxs]

input_shape = (None, 1, 28, 28)
layers = [nn.InputLayer(input_shape)]
layers.append(nn.Conv(
                layers[-1],
                n_feats=2,
                filter_shape=(3,3),
                init_stddev=0.01,
                activation_fun=nn.layers.Activation('relu')))
layers.append(nn.Flatten(layers[-1]))
layers.append(nn.FullyConnectedLayer(
                layers[-1],
                num_units=10,
                init_stddev=0.1,
                activation_fun=nn.layers.Activation('relu')))
layers.append(nn.SoftmaxOutput(layers[-1]))
net = nn.NeuralNetwork(layers)

start_train_time = time.time()

train_errors, valid_errors = net.train(X_train, y_train, Xvalid=X_valid, Yvalid=Y_valid, learning_rate=0.1, max_epochs=15, batch_size=100, y_one_hot=True)
end_train_time = time.time()

print("Training errors: \n" + str(train_errors))
print("Validation errors: \n" + str(valid_errors))

test_error = net.classification_error(X_test, Y_test)
print("Test error: {:.4f}".format(test_error))
f = open("output.txt", "w")
f.write("Training errors: \n" + str(train_errors) + "\n")
f.write("Validation errors: \n" + str(valid_errors) + "\n")
f.write("Test error: {:.4f}".format(test_error) + "\n")
f.write("Time: {:.2f}".format(end_train_time - start_train_time))
f.close()
