In [50]:
# MNIST Convolutional Neural Network Analysis from scratch with numpy

import numpy as np
import sys
np.random.seed(1)

data = np.load('mnist.npz')

In [51]:
x_train = data['x_train']
x_train = x_train[0:7000]
y_train = data['y_train']
y_train = y_train[0:7000]

x_test = data['x_test']
x_test = x_test[0:1000]
y_test = data['y_test']
y_test = y_test[0:1000]

In [52]:
images = x_train.reshape(len(x_train), 28*28)/25

labels = y_train

In [53]:
one_hot_labels = np.zeros((len(labels), 10))

for i,l in enumerate(labels):
    one_hot_labels[i][l]=1
    
labels = one_hot_labels

In [54]:
test_images = x_test.reshape((len(x_test), 28*28))/255

test_labels = np.zeros((len(y_test), 10))

for i,l in enumerate(y_test):
    test_labels[i][l]=1

In [55]:
# define applied function for CNN
def tanh(x):
    return np.tanh(x)

def tanh2deriv(x):
    return 1-(x**2)

def softmax(x):
    temp = np.exp(x)
    return temp/np.sum(temp, axis=1, keepdims=True)

In [56]:
# define kernels and some configurations for CNN

alpha = 2
iterations = 300
pixels_per_image = 28*28
num_labels = 10

batch_size = 128

input_rows = 28
input_cols = 28

kernel_rows = 3
kernel_cols = 3
num_kernels = 16

hidden_size = ((input_rows-kernel_rows)*(input_cols-kernel_cols))*num_kernels

kernels = 0.02 * np.random.random((kernel_rows*kernel_cols, num_kernels))-0.01

weights_1_2 = 0.2*np.random.random((hidden_size, num_labels))-0.1


In [57]:
def get_image_section(layer, row_from, row_to, col_from, col_to):
    section = layer[:, row_from:row_to, col_from:col_to]
    return section.reshape(-1,1, row_to-row_from, col_to-col_from)

In [58]:
for j in range(iterations):
    correct_cnt = 0
    for i in range(int(len(images) / batch_size)):
        batch_start, batch_end=((i * batch_size),((i+1)*batch_size))
        layer_0 = images[batch_start:batch_end]
        layer_0 = layer_0.reshape(layer_0.shape[0],28,28)
        layer_0.shape
        sects = list()
        for row_start in range(layer_0.shape[1]-kernel_rows):
            for col_start in range(layer_0.shape[2] - kernel_cols):
                sect = get_image_section(layer_0,
                row_start,
                row_start+kernel_rows,
                col_start,
                col_start+kernel_cols)
                sects.append(sect)
        expanded_input = np.concatenate(sects,axis=1)
        es = expanded_input.shape
        flattened_input = expanded_input.reshape(es[0]*es[1],-1)
        kernel_output = flattened_input.dot(kernels)
        layer_1 = tanh(kernel_output.reshape(es[0],-1))
        dropout_mask = np.random.randint(2,size=layer_1.shape)
        layer_1 *= dropout_mask * 2
        layer_2 = softmax(np.dot(layer_1,weights_1_2))
        for k in range(batch_size):
            labelset = labels[batch_start+k:batch_start+k+1]
            _inc = int(np.argmax(layer_2[k:k+1]) ==
            np.argmax(labelset))
            correct_cnt += _inc
        layer_2_delta = (labels[batch_start:batch_end]-layer_2)/ (batch_size * layer_2.shape[0])
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * \
        tanh2deriv(layer_1)
        layer_1_delta *= dropout_mask
        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        l1d_reshape = layer_1_delta.reshape(kernel_output.shape)
        k_update = flattened_input.T.dot(l1d_reshape)
        kernels -= alpha * k_update
    test_correct_cnt = 0
    for i in range(len(test_images)):
        layer_0 = test_images[i:i+1]
        layer_0 = layer_0.reshape(layer_0.shape[0],28,28)
        layer_0.shape
        sects = list()
        for row_start in range(layer_0.shape[1]-kernel_rows):
            for col_start in range(layer_0.shape[2] - kernel_cols):
                sect = get_image_section(layer_0,
                                        row_start,
                                        row_start+kernel_rows,
                                        col_start,
                                        col_start+kernel_cols)
                sects.append(sect)

        expanded_input = np.concatenate(sects,axis=1)
        es = expanded_input.shape
        flattened_input = expanded_input.reshape(es[0]*es[1],-1)
        kernel_output = flattened_input.dot(kernels)
        layer_1 = tanh(kernel_output.reshape(es[0],-1))
        layer_2 = np.dot(layer_1,weights_1_2)
        test_correct_cnt += int(np.argmax(layer_2) ==
        np.argmax(test_labels[i:i+1]))
    if(j % 10 == 0):
        sys.stdout.write("\n"+ \
                        "I:" + str(j) + \
                        " Test-Acc:"+str(test_correct_cnt/float(len(test_images)))+\
                        " Train-Acc:" + str(correct_cnt/float(len(images))))


I:0 Test-Acc:0.509 Train-Acc:0.11928571428571429
I:10 Test-Acc:0.844 Train-Acc:0.748
I:20 Test-Acc:0.864 Train-Acc:0.7958571428571428
I:30 Test-Acc:0.883 Train-Acc:0.822
I:40 Test-Acc:0.891 Train-Acc:0.8485714285714285
I:50 Test-Acc:0.896 Train-Acc:0.8555714285714285
I:60 Test-Acc:0.894 Train-Acc:0.8732857142857143
I:70 Test-Acc:0.889 Train-Acc:0.878
I:80 Test-Acc:0.901 Train-Acc:0.8785714285714286
I:90 Test-Acc:0.903 Train-Acc:0.8821428571428571
I:100 Test-Acc:0.902 Train-Acc:0.8852857142857142
I:110 Test-Acc:0.901 Train-Acc:0.8924285714285715
I:120 Test-Acc:0.894 Train-Acc:0.8912857142857142
I:130 Test-Acc:0.898 Train-Acc:0.8982857142857142
I:140 Test-Acc:0.882 Train-Acc:0.9025714285714286
I:150 Test-Acc:0.899 Train-Acc:0.8965714285714286
I:160 Test-Acc:0.897 Train-Acc:0.8971428571428571
I:170 Test-Acc:0.899 Train-Acc:0.9065714285714286
I:180 Test-Acc:0.899 Train-Acc:0.909
I:190 Test-Acc:0.901 Train-Acc:0.9148571428571428
I:200 Test-Acc:0.895 Train-Acc:0.9162857142857143
I:210 Test-