# Upgrading our MNIST Network

In [1]:
import numpy as np, sys
np.random.seed(1)

from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

images, labels = (x_train[0:1000].reshape(1000,28*28) / 255,
                  y_train[0:1000])


Using TensorFlow backend.


In [7]:
print(images.shape)
print(labels.shape)
images

(1000, 784)
(1000,)


array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [8]:
one_hot_labels = np.zeros((len(labels),10))
for i,l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

In [11]:
one_hot_labels

array([[0., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [14]:
test_images = x_test.reshape(len(x_test),28*28) / 255
test_labels = np.zeros((len(y_test),10))
test_labels
for i,l in enumerate(y_test):
    test_labels[i][l] = 1


In [79]:
def tanh(x):
    return np.tanh(x)

def tanh2deriv(output):
    return 1 - (output ** 2)

def softmax(x):
    temp = np.exp(x)
    return temp / np.sum(temp, axis=1, keepdims=True)

In [15]:
alpha, iterations = (2, 300)
pixels_per_image, num_labels = (784, 10)
batch_size = 128

input_rows = 28
input_cols = 28

kernel_rows = 3
kernel_cols = 3
num_kernels = 16

hidden_size = ((input_rows - kernel_rows) * (input_cols - kernel_cols)) * num_kernels

hidden_size

10000

In [18]:
# batch_size = 128, pixels_per_image = 784
# input matrix dim 128 * 784

0.02*np.random.random((pixels_per_image,hidden_size))-0.01
# layer_1 size = hidden_size
# weigth_0_1 = 784 * 1000

array([[ 0.00378155, -0.00668871,  0.00181522, ..., -0.00021026,
         0.00699899, -0.0067288 ],
       [-0.00897288,  0.0045621 ,  0.00647406, ..., -0.0033061 ,
         0.0064441 ,  0.00457256],
       [ 0.00946882, -0.00097439, -0.00889002, ..., -0.00665264,
        -0.00223658, -0.00528768],
       ...,
       [ 0.00063445, -0.00220583,  0.00676012, ...,  0.00487011,
         0.00623798,  0.00658959],
       [-0.00649892, -0.0057027 ,  0.00351253, ...,  0.00354247,
        -0.0024938 , -0.00778811],
       [-0.00795792,  0.0016482 , -0.00600537, ...,  0.00797021,
         0.00141039,  0.00384052]])

In [20]:
# weights_0_1 = 0.02*np.random.random((pixels_per_image,hidden_size))-0.01
kernels = 0.02*np.random.random((kernel_rows*kernel_cols, num_kernels))-0.01
print(kernels.shape)
weights_1_2 = 0.2*np.random.random((hidden_size, num_labels)) - 0.1


(9, 16)


In [24]:
def get_image_section(layer,row_from, row_to, col_from, col_to):
    section = layer[:,row_from:row_to,col_from:col_to]
    return section.reshape(-1,1,row_to-row_from, col_to-col_from)

In [None]:
for j in range(iterations):
    correct_cnt = 0
    for i in range(int(len(images) / batch_size)):
        # move down of rows with batch_size
        batch_start, batch_end=((i * batch_size),((i+1)*batch_size))
        layer_0 = images[batch_start:batch_end]
        # reshape the 128x784 to 128x28x28 as image pixel
        # the layer_0.shape[0] is the input batch size
        layer_0 = layer_0.reshape(layer_0.shape[0],28,28)
        layer_0.shape

        sects = list()
        for row_start in range(layer_0.shape[1]-kernel_rows):
            for col_start in range(layer_0.shape[2] - kernel_cols):
                sect = get_image_section(layer_0,
                                         row_start,
                                         row_start+kernel_rows,
                                         col_start,
                                         col_start+kernel_cols)
                sects.append(sect)

        expanded_input = np.concatenate(sects,axis=1)
        es = expanded_input.shape
        flattened_input = expanded_input.reshape(es[0]*es[1],-1)

        kernel_output = flattened_input.dot(kernels)
        layer_1 = tanh(kernel_output.reshape(es[0],-1))
        dropout_mask = np.random.randint(2,size=layer_1.shape)
        layer_1 *= dropout_mask * 2
        layer_2 = softmax(np.dot(layer_1,weights_1_2))

        for k in range(batch_size):
            labelset = labels[batch_start+k:batch_start+k+1]
            _inc = int(np.argmax(layer_2[k:k+1]) == 
                               np.argmax(labelset))
            correct_cnt += _inc

        layer_2_delta = (labels[batch_start:batch_end]-layer_2)\
                        / (batch_size * layer_2.shape[0])
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * \
                        tanh2deriv(layer_1)
        layer_1_delta *= dropout_mask
        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        l1d_reshape = layer_1_delta.reshape(kernel_output.shape)
        k_update = flattened_input.T.dot(l1d_reshape)
        kernels -= alpha * k_update
    
    test_correct_cnt = 0

    for i in range(len(test_images)):

        layer_0 = test_images[i:i+1]
#         layer_1 = tanh(np.dot(layer_0,weights_0_1))
        layer_0 = layer_0.reshape(layer_0.shape[0],28,28)
        layer_0.shape

        sects = list()
        for row_start in range(layer_0.shape[1]-kernel_rows):
            for col_start in range(layer_0.shape[2] - kernel_cols):
                sect = get_image_section(layer_0,
                                         row_start,
                                         row_start+kernel_rows,
                                         col_start,
                                         col_start+kernel_cols)
                sects.append(sect)

        expanded_input = np.concatenate(sects,axis=1)
        es = expanded_input.shape
        flattened_input = expanded_input.reshape(es[0]*es[1],-1)

        kernel_output = flattened_input.dot(kernels)
        layer_1 = tanh(kernel_output.reshape(es[0],-1))
        layer_2 = np.dot(layer_1,weights_1_2)

        test_correct_cnt += int(np.argmax(layer_2) == 
                                np.argmax(test_labels[i:i+1]))
    if(j % 1 == 0):
        sys.stdout.write("\n"+ \
         "I:" + str(j) + \
         " Test-Acc:"+str(test_correct_cnt/float(len(test_images)))+\
         " Train-Acc:" + str(correct_cnt/float(len(images))))


I:0 Test-Acc:0.0342 Train-Acc:0.048
I:1 Test-Acc:0.0383 Train-Acc:0.041
I:2 Test-Acc:0.0442 Train-Acc:0.04
I:3 Test-Acc:0.0518 Train-Acc:0.045
I:4 Test-Acc:0.0685 Train-Acc:0.053
I:5 Test-Acc:0.1023 Train-Acc:0.065
I:6 Test-Acc:0.1417 Train-Acc:0.081
I:7 Test-Acc:0.1826 Train-Acc:0.109
I:8 Test-Acc:0.2212 Train-Acc:0.127
I:9 Test-Acc:0.2685 Train-Acc:0.163
I:10 Test-Acc:0.3208 Train-Acc:0.186
I:11 Test-Acc:0.3696 Train-Acc:0.222
I:12 Test-Acc:0.403 Train-Acc:0.244
I:13 Test-Acc:0.4165 Train-Acc:0.289
I:14 Test-Acc:0.4058 Train-Acc:0.273
I:15 Test-Acc:0.3559 Train-Acc:0.263
I:16 Test-Acc:0.2657 Train-Acc:0.231
I:17 Test-Acc:0.116 Train-Acc:0.181
I:18 Test-Acc:0.0647 Train-Acc:0.098
I:19 Test-Acc:0.0444 Train-Acc:0.064