Upgrading the MNIST Network

In [1]:
import numpy as np
from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
images, labels = (x_train[0:1000].reshape(1000, 28*28)/255, y_train[0:1000])

one_hot_labels = np.zeros((len(labels), 10))
for i, l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

test_images = x_test.reshape(len(x_test), 28*28)/255
test_labels = np.zeros((len(y_test), 10))
for i, l in enumerate(y_test):
    test_labels[i][l] = 1
    
def tanh(x):
    return np.tanh(x)

def tanh2deriv(output):
    return (1 - (output ** 2))

def softmax(x):
    temp = np.exp(x)
    return temp/np.sum(temp, axis=1, keepdims = True)

Using TensorFlow backend.


In [2]:
np.random.seed(1)

alpha = 0.001
iterations = 100
hidden_size = 100
pixels_per_image = 784
num_labels = 10

weights_0 = 0.2*np.random.random((pixels_per_image, hidden_size)) - 0.1
weights_1 = 0.2*np.random.random((hidden_size, num_labels)) - 0.1

batch_size = 100

for j in range(iterations):
    error = 0
    correct_cnt = 0
    test_err = 0
    test_correct_cnt = 0
    
    for i in range(int(len(images) / batch_size)):
        batch_start, batch_end = ((i * batch_size), ((i+1) * batch_size))
        
        layer_0 = images[batch_start:batch_end]
        layer_1 = tanh(np.dot(layer_0, weights_0))
        dropout = np.random.randint(2, size = layer_1.shape)
        layer_1 = layer_1 * dropout
        layer_2 = softmax(np.dot(layer_1, weights_1))
        
        error += np.sum((labels[batch_start:batch_end] - layer_2) ** 2)
        for k in range(batch_size):
            correct_cnt = int(np.argmax(layer_2[k:k+1]) == np.argmax(labels[batch_start+k:batch_start+k+1]))
            
            layer_2_delta = (labels[batch_start:batch_end] - layer_2)/batch_size
            layer_1_delta = layer_2_delta.dot(weights_1.T) * tanh2deriv(layer_1)
            layer_1_delta = layer_1_delta * dropout
            
            weights_1 += alpha * layer_1.T.dot(layer_2_delta)
            weights_0 += alpha * layer_0.T.dot(layer_1_delta)
        
        
    for i in range(len(test_images)):
        layer_0 = test_images[i:i+1]
        layer_1 = tanh(np.dot(layer_0, weights_0))
        layer_2 = np.dot(layer_1, weights_1)
        
        test_err += np.sum((test_labels[i:i+1] - layer_2) ** 2)
        test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))
            
    print("Iteration: "+str(j))
    print("Train-Err: "+str(error/float(len(images)))[0:5])
    print("Train-Accuracy: "+str(correct_cnt/float(len(images))))
    print("Test-Accuracy: "+str((test_correct_cnt/float(len(images)))/10))
    print("---------------------")

Iteration: 0
Train-Err: 0.892
Train-Accuracy: 0.0
Test-Accuracy: 0.37839999999999996
---------------------
Iteration: 1
Train-Err: 0.863
Train-Accuracy: 0.0
Test-Accuracy: 0.576
---------------------
Iteration: 2
Train-Err: 0.830
Train-Accuracy: 0.0
Test-Accuracy: 0.6388
---------------------
Iteration: 3
Train-Err: 0.791
Train-Accuracy: 0.0
Test-Accuracy: 0.6618999999999999
---------------------
Iteration: 4
Train-Err: 0.750
Train-Accuracy: 0.001
Test-Accuracy: 0.6759000000000001
---------------------
Iteration: 5
Train-Err: 0.705
Train-Accuracy: 0.0
Test-Accuracy: 0.6860999999999999
---------------------
Iteration: 6
Train-Err: 0.655
Train-Accuracy: 0.0
Test-Accuracy: 0.6987
---------------------
Iteration: 7
Train-Err: 0.603
Train-Accuracy: 0.001
Test-Accuracy: 0.7055
---------------------
Iteration: 8
Train-Err: 0.562
Train-Accuracy: 0.001
Test-Accuracy: 0.7171000000000001
---------------------
Iteration: 9
Train-Err: 0.524
Train-Accuracy: 0.001
Test-Accuracy: 0.7285999999999999
--

Iteration: 81
Train-Err: 0.107
Train-Accuracy: 0.001
Test-Accuracy: 0.8672000000000001
---------------------
Iteration: 82
Train-Err: 0.104
Train-Accuracy: 0.001
Test-Accuracy: 0.8676
---------------------
Iteration: 83
Train-Err: 0.101
Train-Accuracy: 0.001
Test-Accuracy: 0.8679
---------------------
Iteration: 84
Train-Err: 0.100
Train-Accuracy: 0.001
Test-Accuracy: 0.8683
---------------------
Iteration: 85
Train-Err: 0.100
Train-Accuracy: 0.001
Test-Accuracy: 0.8678000000000001
---------------------
Iteration: 86
Train-Err: 0.099
Train-Accuracy: 0.001
Test-Accuracy: 0.8674
---------------------
Iteration: 87
Train-Err: 0.094
Train-Accuracy: 0.001
Test-Accuracy: 0.868
---------------------
Iteration: 88
Train-Err: 0.101
Train-Accuracy: 0.001
Test-Accuracy: 0.8686999999999999
---------------------
Iteration: 89
Train-Err: 0.095
Train-Accuracy: 0.001
Test-Accuracy: 0.8693
---------------------
Iteration: 90
Train-Err: 0.094
Train-Accuracy: 0.001
Test-Accuracy: 0.8692
-----------------