In [1]:
from keras.datasets import mnist
import numpy as np
import sys

(x_train, y_train), (x_test, y_test) = mnist.load_data()

images = x_train[0:1000].reshape(1000, 28*28) / 255
labels = y_train[0:1000]

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
one_hot_labels = np.zeros((len(labels), 10))

In [3]:
for i,l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

In [4]:
test_images = x_test.reshape(len(x_test), 28*28) / 255
test_labels = np.zeros((len(y_test), 10))
for i, l in enumerate(y_test):
    test_labels[i][l] = 1

In [20]:
np.random.seed(3)
batch_size = 100
def tanh(x):
    return np.tanh(x)
def tanh2deriv(output):
    return 1 - (output ** 2)
def softmax(x):
    temp = np.exp(x)
    return temp / np.sum(temp, axis=1, keepdims=True)
alpha, iterations, hidden_size, pixels_per_image, num_labels = (2, 300, 100, 784, 10)

In [21]:
weights_0_1 = 0.02*np.random.random((pixels_per_image, hidden_size)) - 0.01
weights_1_2 = 0.2*np.random.random((hidden_size, num_labels)) - 0.1

for j in range(iterations):
    correct_cnt = 0
    
    for i in range(int(len(images) / batch_size)):
        
        batch_start, batch_end = ((i * batch_size), ((i+1) * batch_size))
        
        layer_0 = images[batch_start:batch_end]
        layer_1 = tanh(np.dot(layer_0, weights_0_1))
        dropout_mask = np.random.randint(2, size=layer_1.shape)
        layer_1 *= dropout_mask * 2
        layer_2 = softmax(np.dot(layer_1, weights_1_2))
        
        for k in range(batch_size):
            correct_cnt += int(np.argmax(layer_2[k:k+1]) == np.argmax(labels[batch_start+k:batch_end+k+1]))
        
        layer_2_delta = (labels[batch_start:batch_end] - layer_2) / (batch_size * layer_2.shape[0])
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * tanh2deriv(layer_1)
        
        layer_1_delta *= dropout_mask
        
        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)
        
    test_correct_cnt = 0
    for i in range(len(test_images)):
        layer_0 = test_images[i:i+1]
        layer_1 = tanh(np.dot(layer_0, weights_0_1))
        layer_2 = np.dot(layer_1, weights_1_2)
        test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))
    if(j % 10 == 0):
        sys.stdout.write("\n" + \
                     " I: " + str(j) + \
                     " Test Acc: " + str(test_correct_cnt/float(len(test_images))) + \
                     " Train Acc: " + str(correct_cnt/float(len(images))))


 I: 0 Test Acc: 0.3163 Train Acc: 0.15
 I: 10 Test Acc: 0.6901 Train Acc: 0.713
 I: 20 Test Acc: 0.7137 Train Acc: 0.741
 I: 30 Test Acc: 0.7436 Train Acc: 0.779
 I: 40 Test Acc: 0.7743 Train Acc: 0.813
 I: 50 Test Acc: 0.7962 Train Acc: 0.824
 I: 60 Test Acc: 0.8131 Train Acc: 0.85
 I: 70 Test Acc: 0.8236 Train Acc: 0.856
 I: 80 Test Acc: 0.8294 Train Acc: 0.875
 I: 90 Test Acc: 0.8369 Train Acc: 0.874
 I: 100 Test Acc: 0.8404 Train Acc: 0.896
 I: 110 Test Acc: 0.8443 Train Acc: 0.896
 I: 120 Test Acc: 0.8483 Train Acc: 0.903
 I: 130 Test Acc: 0.8513 Train Acc: 0.892
 I: 140 Test Acc: 0.8523 Train Acc: 0.912
 I: 150 Test Acc: 0.8552 Train Acc: 0.914
 I: 160 Test Acc: 0.8578 Train Acc: 0.924
 I: 170 Test Acc: 0.8586 Train Acc: 0.92
 I: 180 Test Acc: 0.8599 Train Acc: 0.92
 I: 190 Test Acc: 0.8602 Train Acc: 0.924
 I: 200 Test Acc: 0.8623 Train Acc: 0.92
 I: 210 Test Acc: 0.8632 Train Acc: 0.933
 I: 220 Test Acc: 0.8644 Train Acc: 0.937
 I: 230 Test Acc: 0.8657 Train Acc: 0.933
 I: 240