# MNIST

### Fetch data

In [28]:
import sys, numpy as np
from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

### Train

In [29]:
images, labels = (x_train[:1000].reshape(1000, 28*28)/255, y_train[0:1000])
one_hot_labels = np.zeros((len(labels), 10))
for i,l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

test_images = x_test.reshape(len(x_test), 28*28)/255
test_labels = np.zeros((len(y_test), 10))
for i,l in enumerate(y_test):
    test_labels[i][l] = 1

np.random.seed(1)
relu = lambda x: (x>=0)*x
relu2deriv = lambda x: x>=0
alpha, iterations, hidden_size, pixels_per_image, num_labels = (0.005, 350, 40, 784, 10)
weights_0_1 = 0.2*np.random.random((pixels_per_image, hidden_size))-0.1
weights_1_2 = 0.2*np.random.random((hidden_size, num_labels))-0.1

for j in range(iterations):
    error, correct_cnt = (0.0, 0)
    for i in range(len(images)):
        layer_0 = images[i:i+1]
        layer_1 = relu(np.dot(layer_0, weights_0_1))
        layer_2 = np.dot(layer_1, weights_1_2)
        error += np.sum((labels[i:i+1]-layer_2)**2)
        correct_cnt += int(np.argmax(layer_2)==np.argmax(labels[i:i+1]))
        layer_2_delta = (labels[i:i+1]-layer_2)
        layer_1_delta = layer_2_delta.dot(weights_1_2.T)*relu2deriv(layer_1)
        weights_1_2 += alpha*layer_1.T.dot(layer_2_delta)
        weights_0_1 += alpha*layer_0.T.dot(layer_1_delta)
    sys.stdout.write("\r" + " I:" + str(j) + 
                    " Error:" + str((error/len(images))) +
                    " Correct:" + str(correct_cnt/len(images)))

 I:349 Error:0.10881979854066498 Correct:1.099

### Test 

In [30]:
if(j%10==0 or j==iterations-1):
    error = correct_cnt = (0.0, 0)
    for i in range(len(test_images)):
        layer_0 = test_images[i:i+1]
        layer_1 = relu(np.dot(layer_0, weights_0_1))
        layer_2 = np.dot(layer_1, weights_1_2)
        error += np.sum((test_labels[i:i+1]-layer_2)**2)
        correct_cnt += (np.argmax(layer_2)==np.argmax(test_labels[i:i+1]))
    sys.stdout.write(" Test-Err:" + str(error/len(test_images)) + 
                     " Test-Acc:" + str(correct_cnt/len(test_images)))
    print()

 Test-Err:[0.65340554 0.65340554] Test-Acc:[0.7073 0.7073]


### Dropout regularization

In [36]:
np.random.seed(1)
relu = lambda x: (x>=0)*x
relu2deriv = lambda x: x>=0
alpha, iterations, hidden_size, pixels_per_image, num_labels = (0.005, 350, 40, 784, 10)
weights_0_1 = 0.2*np.random.random((pixels_per_image, hidden_size))-0.1
weights_1_2 = 0.2*np.random.random((hidden_size, num_labels))-0.1

for j in range(iterations):
    error, correct_cnt = (0.0, 0)
    for i in range(len(images)):
        layer_0 = images[i:i+1]
        layer_1 = relu(np.dot(layer_0, weights_0_1))
        dropout_mask = np.random.randint(2, size=layer_1.shape)
        layer_1 *= dropout_mask*2
        layer_2 = np.dot(layer_1, weights_1_2)
        error += np.sum((labels[i:i+1]-layer_2)**2)
        correct_cnt += int(np.argmax(layer_2)==np.argmax(labels[i:i+1]))
        layer_2_delta = (labels[i:i+1]-layer_2)
        layer_1_delta = layer_2_delta.dot(weights_1_2.T)*relu2deriv(layer_1)
        layer_1_delta *= dropout_mask
        weights_1_2 += alpha*layer_1.T.dot(layer_2_delta)
        weights_0_1 += alpha*layer_0.T.dot(layer_1_delta)
    if(j%10==0 or j==iterations-1):
        test_error = test_correct_cnt = (0.0, 0)
        for i in range(len(test_images)):
            layer_0 = test_images[i:i+1]
            layer_1 = relu(np.dot(layer_0, weights_0_1))
            layer_2 = np.dot(layer_1, weights_1_2)
            test_error += np.sum((test_labels[i:i+1]-layer_2)**2)
            test_correct_cnt += (np.argmax(layer_2)==np.argmax(test_labels[i:i+1]))
        sys.stdout.write(" Train-Err:" + str(error/len(test_images)) + 
                         " Train-Acc:" + str(correct_cnt/len(test_images)) +
                        " Test-Err:" + str(test_error/len(test_images)) + 
                        " Test-Acc:" + str(test_correct_cnt/len(test_images)))
    print()

 Train-Err:0.08854150378548294 Train-Acc:0.0289 Test-Err:[0.71811291 0.71811291] Test-Acc:[0.5418 0.5418]









 Train-Err:0.056442490520860096 Train-Acc:0.0647 Test-Err:[0.50199879 0.50199879] Test-Acc:[0.7365 0.7365]









 Train-Err:0.05305078554418434 Train-Acc:0.0681 Test-Err:[0.47819835 0.47819835] Test-Acc:[0.7621 0.7621]









 Train-Err:0.05081959946032416 Train-Acc:0.071 Test-Err:[0.45770377 0.45770377] Test-Acc:[0.7915 0.7915]









 Train-Err:0.049224387915888596 Train-Acc:0.0719 Test-Err:[0.44559247 0.44559247] Test-Acc:[0.7998 0.7998]









 Train-Err:0.04626158977664231 Train-Acc:0.0742 Test-Err:[0.43086597 0.43086597] Test-Acc:[0.8145 0.8145]









 Train-Err:0.04721649431897942 Train-Acc:0.0746 Test-Err:[0.44656634 0.44656634] Test-Acc:[0.7974 0.7974]









 Train-Err:0.04630812403222845 Train-Acc:0.0744 Test-Err:[0.44504726 0.44504726] Test-Acc:[0.7901 0.7901]









 Train-Err:0.04617832439825736 Train-Acc:0.0764 Test-Err:[0.42626659 0.42626659