3 Layer Newtork on MNIST

In [1]:
import numpy as np
from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

images, labels = (x_train[0:1000].reshape(1000, 28*28)/255, y_train[0:1000])

one_hot_labels = np.zeros((len(labels),10))
for i, l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

test_images = x_test.reshape(len(x_test), 28*28)/255
test_labels = np.zeros((len(y_test),10))
for i, l in enumerate(y_test):
    test_labels[i][l] = 1
    
np.random.seed(1)

def relu(x):
    return (x >= 0) * x

def relu2deriv(x):
    return (x >= 0)

Using TensorFlow backend.


In [2]:
alpha = 0.005 
iterations = 300 
hidden_size = 40 
pixels_per_image = 784 
num_labels = 10

weights_0 = 0.2*np.random.random((pixels_per_image, hidden_size)) - 0.1
weights_1 = 0.2*np.random.random((hidden_size, num_labels)) - 0.1

for j in range(iterations):
    error = 0
    correct_cnt = 0
    
    for i in range(len(images)):
        layer_0 = images[i:i+1]
        layer_1 = relu(np.dot(layer_0, weights_0))
        layer_2 = np.dot(layer_1, weights_1)
        
        error += np.sum((labels[i:i+1] - layer_2) ** 2)
        correct_cnt += int(np.argmax(layer_2) == np.argmax(labels[i:i+1]))
        
        layer_2_delta = labels[i:i+1] - layer_2
        layer_1_delta = layer_2_delta.dot(weights_1.T) * relu2deriv(layer_1)
        
        weights_1 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0 += alpha * layer_0.T.dot(layer_1_delta)
        
    print("Iteration: "+str(j))
    print("Train-Err: "+str(error/float(len(images)))[0:5])
    print("Train-Accuracy: "+str(correct_cnt/float(len(images))))
    print("---------------------")

Iteration: 0
Train-Err: 0.722
Train-Accuracy: 0.537
---------------------
Iteration: 1
Train-Err: 0.512
Train-Accuracy: 0.753
---------------------
Iteration: 2
Train-Err: 0.448
Train-Accuracy: 0.804
---------------------
Iteration: 3
Train-Err: 0.411
Train-Accuracy: 0.837
---------------------
Iteration: 4
Train-Err: 0.385
Train-Accuracy: 0.846
---------------------
Iteration: 5
Train-Err: 0.367
Train-Accuracy: 0.863
---------------------
Iteration: 6
Train-Err: 0.352
Train-Accuracy: 0.876
---------------------
Iteration: 7
Train-Err: 0.340
Train-Accuracy: 0.884
---------------------
Iteration: 8
Train-Err: 0.329
Train-Accuracy: 0.892
---------------------
Iteration: 9
Train-Err: 0.320
Train-Accuracy: 0.897
---------------------
Iteration: 10
Train-Err: 0.312
Train-Accuracy: 0.901
---------------------
Iteration: 11
Train-Err: 0.304
Train-Accuracy: 0.904
---------------------
Iteration: 12
Train-Err: 0.297
Train-Accuracy: 0.91
---------------------
Iteration: 13
Train-Err: 0.291
Train

Iteration: 110
Train-Err: 0.161
Train-Accuracy: 0.984
---------------------
Iteration: 111
Train-Err: 0.161
Train-Accuracy: 0.984
---------------------
Iteration: 112
Train-Err: 0.161
Train-Accuracy: 0.985
---------------------
Iteration: 113
Train-Err: 0.160
Train-Accuracy: 0.985
---------------------
Iteration: 114
Train-Err: 0.160
Train-Accuracy: 0.986
---------------------
Iteration: 115
Train-Err: 0.159
Train-Accuracy: 0.986
---------------------
Iteration: 116
Train-Err: 0.159
Train-Accuracy: 0.986
---------------------
Iteration: 117
Train-Err: 0.158
Train-Accuracy: 0.986
---------------------
Iteration: 118
Train-Err: 0.158
Train-Accuracy: 0.986
---------------------
Iteration: 119
Train-Err: 0.158
Train-Accuracy: 0.986
---------------------
Iteration: 120
Train-Err: 0.157
Train-Accuracy: 0.986
---------------------
Iteration: 121
Train-Err: 0.157
Train-Accuracy: 0.986
---------------------
Iteration: 122
Train-Err: 0.156
Train-Accuracy: 0.986
---------------------
Iteration: 1

Iteration: 218
Train-Err: 0.125
Train-Accuracy: 0.998
---------------------
Iteration: 219
Train-Err: 0.125
Train-Accuracy: 0.998
---------------------
Iteration: 220
Train-Err: 0.125
Train-Accuracy: 0.998
---------------------
Iteration: 221
Train-Err: 0.125
Train-Accuracy: 0.998
---------------------
Iteration: 222
Train-Err: 0.125
Train-Accuracy: 0.998
---------------------
Iteration: 223
Train-Err: 0.124
Train-Accuracy: 0.998
---------------------
Iteration: 224
Train-Err: 0.124
Train-Accuracy: 0.998
---------------------
Iteration: 225
Train-Err: 0.124
Train-Accuracy: 0.998
---------------------
Iteration: 226
Train-Err: 0.124
Train-Accuracy: 0.998
---------------------
Iteration: 227
Train-Err: 0.124
Train-Accuracy: 0.998
---------------------
Iteration: 228
Train-Err: 0.124
Train-Accuracy: 0.998
---------------------
Iteration: 229
Train-Err: 0.123
Train-Accuracy: 0.998
---------------------
Iteration: 230
Train-Err: 0.123
Train-Accuracy: 0.998
---------------------
Iteration: 2

In [13]:
error = 0
correct_cnt = 0

for i in range(len(test_images)):
    layer_0 = test_images[i:i+1]
    layer_1 = relu(np.dot(layer_0, weights_0))
    layer_2 = np.dot(layer_1, weights_1)
    
    error += np.sum((test_labels[i:i+1] - layer_2) ** 2)
    correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))
    
print("Test-Err: "+str((error/float(len(images)))/10)[0:5])
print("Test-Accuracy: "+str((correct_cnt/float(len(images)))/10))

Test-Err: 0.614
Test-Accuracy: 0.7182999999999999


In [17]:
for j in range(iterations):
    error = 0
    correct_cnt = 0
    
    for i in range(len(images)):
        layer_0 = images[i:i+1]
        layer_1 = relu(np.dot(layer_0, weights_0))
        layer_2 = np.dot(layer_1, weights_1)
        
        error += np.sum((labels[i:i+1] - layer_2) ** 2)
        correct_cnt += int(np.argmax(layer_2) == np.argmax(labels[i:i+1]))
        
        layer_2_delta = labels[i:i+1] - layer_2
        layer_1_delta = layer_2_delta.dot(weights_1.T) * relu2deriv(layer_1)
        
        weights_1 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0 += alpha * layer_0.T.dot(layer_1_delta)
        
    print("Iteration: "+str(j))
    print("Train-Err: "+str(error/float(len(images)))[0:5])
    print("Train-Accuracy: "+str(correct_cnt/float(len(images))))
    
    test_err = 0
    test_correct_cnt = 0
    
    for i in range(len(test_images)):
        layer_0 = test_images[i:i+1]
        layer_1 = relu(np.dot(layer_0, weights_0))
        layer_2 = np.dot(layer_1, weights_1)
        
        test_err += np.sum((test_labels[i:i+1] - layer_2) ** 2)
        test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))
        
    print("Test-Err: "+str((test_err/float(len(images)))/10)[0:5])
    print("Test-Accuracy: "+str((test_correct_cnt/float(len(images)))/10))
    print("---------------------")

Iteration: 0
Train-Err: 0.086
Train-Accuracy: 1.0
Test-Err: 0.832
Test-Accuracy: 0.6262
---------------------
Iteration: 1
Train-Err: 0.086
Train-Accuracy: 1.0
Test-Err: 0.833
Test-Accuracy: 0.626
---------------------
Iteration: 2
Train-Err: 0.086
Train-Accuracy: 1.0
Test-Err: 0.833
Test-Accuracy: 0.6258
---------------------
Iteration: 3
Train-Err: 0.086
Train-Accuracy: 1.0
Test-Err: 0.833
Test-Accuracy: 0.6258
---------------------
Iteration: 4
Train-Err: 0.086
Train-Accuracy: 1.0
Test-Err: 0.833
Test-Accuracy: 0.6256999999999999
---------------------
Iteration: 5
Train-Err: 0.086
Train-Accuracy: 1.0
Test-Err: 0.834
Test-Accuracy: 0.6259
---------------------
Iteration: 6
Train-Err: 0.086
Train-Accuracy: 1.0
Test-Err: 0.834
Test-Accuracy: 0.6256999999999999
---------------------
Iteration: 7
Train-Err: 0.086
Train-Accuracy: 1.0
Test-Err: 0.834
Test-Accuracy: 0.6258
---------------------
Iteration: 8
Train-Err: 0.086
Train-Accuracy: 1.0
Test-Err: 0.835
Test-Accuracy: 0.62569999999999

Test-Err: 0.860
Test-Accuracy: 0.6219
---------------------
Iteration: 74
Train-Err: 0.084
Train-Accuracy: 1.0
Test-Err: 0.860
Test-Accuracy: 0.6216
---------------------
Iteration: 75
Train-Err: 0.084
Train-Accuracy: 1.0
Test-Err: 0.861
Test-Accuracy: 0.6214000000000001
---------------------
Iteration: 76
Train-Err: 0.084
Train-Accuracy: 1.0
Test-Err: 0.861
Test-Accuracy: 0.6214999999999999
---------------------
Iteration: 77
Train-Err: 0.084
Train-Accuracy: 1.0
Test-Err: 0.862
Test-Accuracy: 0.6214999999999999
---------------------
Iteration: 78
Train-Err: 0.084
Train-Accuracy: 1.0
Test-Err: 0.862
Test-Accuracy: 0.6214000000000001
---------------------
Iteration: 79
Train-Err: 0.084
Train-Accuracy: 1.0
Test-Err: 0.863
Test-Accuracy: 0.6213
---------------------
Iteration: 80
Train-Err: 0.083
Train-Accuracy: 1.0
Test-Err: 0.863
Test-Accuracy: 0.6211
---------------------
Iteration: 81
Train-Err: 0.083
Train-Accuracy: 1.0
Test-Err: 0.864
Test-Accuracy: 0.6211
---------------------
Iter

Test-Err: 0.891
Test-Accuracy: 0.6144000000000001
---------------------
Iteration: 146
Train-Err: 0.084
Train-Accuracy: 1.0
Test-Err: 0.892
Test-Accuracy: 0.6146
---------------------
Iteration: 147
Train-Err: 0.084
Train-Accuracy: 1.0
Test-Err: 0.892
Test-Accuracy: 0.6147
---------------------
Iteration: 148
Train-Err: 0.084
Train-Accuracy: 1.0
Test-Err: 0.893
Test-Accuracy: 0.6143
---------------------
Iteration: 149
Train-Err: 0.084
Train-Accuracy: 1.0
Test-Err: 0.893
Test-Accuracy: 0.6142000000000001
---------------------
Iteration: 150
Train-Err: 0.084
Train-Accuracy: 1.0
Test-Err: 0.894
Test-Accuracy: 0.6141
---------------------
Iteration: 151
Train-Err: 0.084
Train-Accuracy: 1.0
Test-Err: 0.894
Test-Accuracy: 0.6134000000000001
---------------------
Iteration: 152
Train-Err: 0.084
Train-Accuracy: 1.0
Test-Err: 0.895
Test-Accuracy: 0.6134000000000001
---------------------
Iteration: 153
Train-Err: 0.084
Train-Accuracy: 1.0
Test-Err: 0.895
Test-Accuracy: 0.6134000000000001
------

Test-Err: 0.938
Test-Accuracy: 0.6035
---------------------
Iteration: 218
Train-Err: 0.085
Train-Accuracy: 1.0
Test-Err: 0.939
Test-Accuracy: 0.6031
---------------------
Iteration: 219
Train-Err: 0.085
Train-Accuracy: 1.0
Test-Err: 0.940
Test-Accuracy: 0.6027
---------------------
Iteration: 220
Train-Err: 0.085
Train-Accuracy: 1.0
Test-Err: 0.941
Test-Accuracy: 0.6022000000000001
---------------------
Iteration: 221
Train-Err: 0.085
Train-Accuracy: 1.0
Test-Err: 0.942
Test-Accuracy: 0.6019
---------------------
Iteration: 222
Train-Err: 0.085
Train-Accuracy: 1.0
Test-Err: 0.943
Test-Accuracy: 0.602
---------------------
Iteration: 223
Train-Err: 0.085
Train-Accuracy: 1.0
Test-Err: 0.944
Test-Accuracy: 0.6017
---------------------
Iteration: 224
Train-Err: 0.085
Train-Accuracy: 1.0
Test-Err: 0.944
Test-Accuracy: 0.6017
---------------------
Iteration: 225
Train-Err: 0.085
Train-Accuracy: 1.0
Test-Err: 0.945
Test-Accuracy: 0.6017
---------------------
Iteration: 226
Train-Err: 0.085
T

Test-Err: 0.989
Test-Accuracy: 0.5995
---------------------
Iteration: 290
Train-Err: 0.087
Train-Accuracy: 1.0
Test-Err: 0.990
Test-Accuracy: 0.5995
---------------------
Iteration: 291
Train-Err: 0.087
Train-Accuracy: 1.0
Test-Err: 0.992
Test-Accuracy: 0.5993
---------------------
Iteration: 292
Train-Err: 0.087
Train-Accuracy: 1.0
Test-Err: 0.994
Test-Accuracy: 0.599
---------------------
Iteration: 293
Train-Err: 0.087
Train-Accuracy: 1.0
Test-Err: 0.996
Test-Accuracy: 0.5991
---------------------
Iteration: 294
Train-Err: 0.087
Train-Accuracy: 1.0
Test-Err: 0.998
Test-Accuracy: 0.5987
---------------------
Iteration: 295
Train-Err: 0.088
Train-Accuracy: 1.0
Test-Err: 1.000
Test-Accuracy: 0.5987
---------------------
Iteration: 296
Train-Err: 0.088
Train-Accuracy: 1.0
Test-Err: 1.001
Test-Accuracy: 0.5984
---------------------
Iteration: 297
Train-Err: 0.088
Train-Accuracy: 1.0
Test-Err: 1.002
Test-Accuracy: 0.5977
---------------------
Iteration: 298
Train-Err: 0.089
Train-Accurac

Dropout

In [18]:
alpha = 0.005 
iterations = 300 
hidden_size = 40 
pixels_per_image = 784 
num_labels = 10

weights_0 = 0.2 * np.random.random((pixels_per_image, hidden_size)) - 0.1
weights_1 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1

for j in range(iterations):
    error = 0
    correct_cnt = 0
    test_err = 0
    test_correct_cnt = 0
    
    for i in range(len(images)):
        layer_0 = images[i:i+1]
        layer_1 = relu(np.dot(layer_0, weights_0))
        dropout = np.random.randint(2, size = layer_1.shape) #dropout
        layer_1 *= dropout * 2
        layer_2 = np.dot(layer_1, weights_1)
        
        error += np.sum((labels[i:i+1] - layer_2) ** 2)
        correct_cnt += int(np.argmax(layer_2) == np.argmax(labels[i:i+1]))
        
        layer_2_delta = labels[i:i+1] - layer_2
        layer_1_delta = layer_2_delta.dot(weights_1.T) * relu2deriv(layer_1)
        layer_1_delta *= dropout
        
        weights_1 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0 += alpha * layer_0.T.dot(layer_1_delta)
        
    for i in range(len(test_images)):
        layer_0 = test_images[i:i+1]
        layer_1 = relu(np.dot(layer_0, weights_0))
        layer_2 = np.dot(layer_1, weights_1)
        
        test_err = np.sum((test_labels[i:i+1] - layer_2) ** 2)
        test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))
        
    print("Iteration: "+str(j))
    print("Train-Err: "+str(error/float(len(images)))[0:5])
    print("Train-Accuracy: "+str(correct_cnt/float(len(images))))
    print("Test-Err: "+str((test_err/float(len(images)))/10)[0:5])
    print("Test-Accuracy: "+str((test_correct_cnt/float(len(images)))/10))
    print("---------------------")

Iteration: 0
Train-Err: 0.903
Train-Accuracy: 0.275
Test-Err: 7.147
Test-Accuracy: 0.5112
---------------------
Iteration: 1
Train-Err: 0.747
Train-Accuracy: 0.453
Test-Err: 6.805
Test-Accuracy: 0.5759000000000001
---------------------
Iteration: 2
Train-Err: 0.696
Train-Accuracy: 0.499
Test-Err: 6.376
Test-Accuracy: 0.601
---------------------
Iteration: 3
Train-Err: 0.659
Train-Accuracy: 0.55
Test-Err: 4.698
Test-Accuracy: 0.6537
---------------------
Iteration: 4
Train-Err: 0.630
Train-Accuracy: 0.591
Test-Err: 5.714
Test-Accuracy: 0.6769000000000001
---------------------
Iteration: 5
Train-Err: 0.617
Train-Accuracy: 0.599
Test-Err: 4.568
Test-Accuracy: 0.6950000000000001
---------------------
Iteration: 6
Train-Err: 0.597
Train-Accuracy: 0.624
Test-Err: 4.577
Test-Accuracy: 0.7218
---------------------
Iteration: 7
Train-Err: 0.588
Train-Accuracy: 0.622
Test-Err: 2.812
Test-Accuracy: 0.7422
---------------------
Iteration: 8
Train-Err: 0.577
Train-Accuracy: 0.638
Test-Err: 2.765
Te

Iteration: 71
Train-Err: 0.463
Train-Accuracy: 0.766
Test-Err: 3.823
Test-Accuracy: 0.7905
---------------------
Iteration: 72
Train-Err: 0.471
Train-Accuracy: 0.737
Test-Err: 3.675
Test-Accuracy: 0.7971
---------------------
Iteration: 73
Train-Err: 0.470
Train-Accuracy: 0.761
Test-Err: 3.308
Test-Accuracy: 0.7893
---------------------
Iteration: 74
Train-Err: 0.458
Train-Accuracy: 0.774
Test-Err: 1.125
Test-Accuracy: 0.8018000000000001
---------------------
Iteration: 75
Train-Err: 0.460
Train-Accuracy: 0.763
Test-Err: 1.786
Test-Accuracy: 0.7799
---------------------
Iteration: 76
Train-Err: 0.465
Train-Accuracy: 0.76
Test-Err: 1.592
Test-Accuracy: 0.7883
---------------------
Iteration: 77
Train-Err: 0.455
Train-Accuracy: 0.772
Test-Err: 1.245
Test-Accuracy: 0.786
---------------------
Iteration: 78
Train-Err: 0.451
Train-Accuracy: 0.769
Test-Err: 1.417
Test-Accuracy: 0.796
---------------------
Iteration: 79
Train-Err: 0.458
Train-Accuracy: 0.764
Test-Err: 1.199
Test-Accuracy: 0.7

Iteration: 142
Train-Err: 0.462
Train-Accuracy: 0.772
Test-Err: 5.157
Test-Accuracy: 0.7974
---------------------
Iteration: 143
Train-Err: 0.427
Train-Accuracy: 0.796
Test-Err: 7.095
Test-Accuracy: 0.7934
---------------------
Iteration: 144
Train-Err: 0.431
Train-Accuracy: 0.793
Test-Err: 2.042
Test-Accuracy: 0.7885
---------------------
Iteration: 145
Train-Err: 0.446
Train-Accuracy: 0.79
Test-Err: 4.300
Test-Accuracy: 0.7931
---------------------
Iteration: 146
Train-Err: 0.444
Train-Accuracy: 0.772
Test-Err: 4.826
Test-Accuracy: 0.792
---------------------
Iteration: 147
Train-Err: 0.428
Train-Accuracy: 0.807
Test-Err: 2.938
Test-Accuracy: 0.7948000000000001
---------------------
Iteration: 148
Train-Err: 0.436
Train-Accuracy: 0.77
Test-Err: 3.087
Test-Accuracy: 0.795
---------------------
Iteration: 149
Train-Err: 0.452
Train-Accuracy: 0.771
Test-Err: 3.199
Test-Accuracy: 0.7857999999999999
---------------------
Iteration: 150
Train-Err: 0.427
Train-Accuracy: 0.784
Test-Err: 5.56

Iteration: 213
Train-Err: 0.429
Train-Accuracy: 0.804
Test-Err: 3.088
Test-Accuracy: 0.8027
---------------------
Iteration: 214
Train-Err: 0.413
Train-Accuracy: 0.811
Test-Err: 4.898
Test-Accuracy: 0.8012
---------------------
Iteration: 215
Train-Err: 0.399
Train-Accuracy: 0.815
Test-Err: 5.026
Test-Accuracy: 0.8005000000000001
---------------------
Iteration: 216
Train-Err: 0.407
Train-Accuracy: 0.817
Test-Err: 4.695
Test-Accuracy: 0.8054
---------------------
Iteration: 217
Train-Err: 0.401
Train-Accuracy: 0.814
Test-Err: 1.568
Test-Accuracy: 0.795
---------------------
Iteration: 218
Train-Err: 0.428
Train-Accuracy: 0.805
Test-Err: 2.617
Test-Accuracy: 0.8012
---------------------
Iteration: 219
Train-Err: 0.420
Train-Accuracy: 0.814
Test-Err: 2.540
Test-Accuracy: 0.7998999999999999
---------------------
Iteration: 220
Train-Err: 0.405
Train-Accuracy: 0.815
Test-Err: 3.049
Test-Accuracy: 0.7908000000000001
---------------------
Iteration: 221
Train-Err: 0.435
Train-Accuracy: 0.779

Iteration: 284
Train-Err: 0.418
Train-Accuracy: 0.812
Test-Err: 4.206
Test-Accuracy: 0.7924
---------------------
Iteration: 285
Train-Err: 0.412
Train-Accuracy: 0.812
Test-Err: 1.048
Test-Accuracy: 0.7868
---------------------
Iteration: 286
Train-Err: 0.433
Train-Accuracy: 0.794
Test-Err: 2.381
Test-Accuracy: 0.7864
---------------------
Iteration: 287
Train-Err: 0.422
Train-Accuracy: 0.816
Test-Err: 2.908
Test-Accuracy: 0.7794
---------------------
Iteration: 288
Train-Err: 0.419
Train-Accuracy: 0.807
Test-Err: 5.844
Test-Accuracy: 0.7845
---------------------
Iteration: 289
Train-Err: 0.410
Train-Accuracy: 0.811
Test-Err: 3.402
Test-Accuracy: 0.7907
---------------------
Iteration: 290
Train-Err: 0.426
Train-Accuracy: 0.805
Test-Err: 8.464
Test-Accuracy: 0.7893
---------------------
Iteration: 291
Train-Err: 0.416
Train-Accuracy: 0.805
Test-Err: 2.135
Test-Accuracy: 0.7842
---------------------
Iteration: 292
Train-Err: 0.425
Train-Accuracy: 0.807
Test-Err: 5.322
Test-Accuracy: 0.7

Batch Gradient Descent

In [None]:
iterations = 300 
hidden_size = 40 
pixels_per_image = 784 
num_labels = 10

weights_0 = 0.2 * np.random.random((pixels_per_image, hidden_size)) - 0.1
weights_1 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1

batch_size = 100 #batch gradient descent

for j in range(iterations):
    error = 0
    correct_cnt = 0
    test_err = 0
    test_correct_cnt = 0
    
    for i in range(int(len(images) / batch_size)):
        batch_start, batch_end = ((i * batch_size), ((i+1) * batch_size))
        
        layer_0 = images[batch_start:batch_end]
        layer_1 = relu(np.dot(layer_0, weights_0))
        dropout = np.random.randint(2, size = layer_1.shape)
        layer_1 *= dropout * 2
        layer_2 = np.dot(layer_1, weights_1)
        
        error += np.sum((labels[i:i+1] - layer_2) ** 2)
        
        for k in range(batch_size):
            correct_cnt += int(np.argmax(layer_2[k:k+1]) == np.argmax(labels[batch_start+k:batch_end+k+1]))
        
        layer_2_delta = labels[i:i+1] - layer_2
        layer_1_delta = layer_2_delta.dot(weights_1.T) * relu2deriv(layer_1)
        layer_1_delta *= dropout
        
        weights_1 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0 += alpha * layer_0.T.dot(layer_1_delta)
        
    for i in range(len(test_images)):
        layer_0 = test_images[i:i+1]
        layer_1 = relu(np.dot(layer_0, weights_0))
        layer_2 = np.dot(layer_1, weights_1)
        
        test_err = np.sum((test_labels[i:i+1] - layer_2) ** 2)
        test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))
        
    print("Iteration: "+str(j))
    print("Train-Err: "+str(error/float(len(images)))[0:5])
    print("Train-Accuracy: "+str(correct_cnt/float(len(images))))
    print("Test-Err: "+str((test_err/float(len(images)))/10)[0:5])
    print("Test-Accuracy: "+str((test_correct_cnt/float(len(images)))/10))
    print("---------------------")

Iteration: 0
Train-Err: 1.912
Train-Accuracy: 0.096
Test-Err: 0.000
Test-Accuracy: 0.0982
---------------------
Iteration: 1
Train-Err: 1.095
Train-Accuracy: 0.108
Test-Err: 0.000
Test-Accuracy: 0.10329999999999999
---------------------
Iteration: 2
Train-Err: 17.22
Train-Accuracy: 0.116
Test-Err: 2958.
Test-Accuracy: 0.0982
---------------------




Iteration: 3
Train-Err: nan
Train-Accuracy: 0.107
Test-Err: nan
Test-Accuracy: 0.098
---------------------
Iteration: 4
Train-Err: nan
Train-Accuracy: 0.097
Test-Err: nan
Test-Accuracy: 0.098
---------------------
Iteration: 5
Train-Err: nan
Train-Accuracy: 0.097
Test-Err: nan
Test-Accuracy: 0.098
---------------------
Iteration: 6
Train-Err: nan
Train-Accuracy: 0.097
Test-Err: nan
Test-Accuracy: 0.098
---------------------
Iteration: 7
Train-Err: nan
Train-Accuracy: 0.097
Test-Err: nan
Test-Accuracy: 0.098
---------------------
Iteration: 8
Train-Err: nan
Train-Accuracy: 0.097
Test-Err: nan
Test-Accuracy: 0.098
---------------------
Iteration: 9
Train-Err: nan
Train-Accuracy: 0.097
Test-Err: nan
Test-Accuracy: 0.098
---------------------
Iteration: 10
Train-Err: nan
Train-Accuracy: 0.097
Test-Err: nan
Test-Accuracy: 0.098
---------------------
Iteration: 11
Train-Err: nan
Train-Accuracy: 0.097
Test-Err: nan
Test-Accuracy: 0.098
---------------------
Iteration: 12
Train-Err: nan
Train-

Iteration: 79
Train-Err: nan
Train-Accuracy: 0.097
Test-Err: nan
Test-Accuracy: 0.098
---------------------
Iteration: 80
Train-Err: nan
Train-Accuracy: 0.097
Test-Err: nan
Test-Accuracy: 0.098
---------------------
Iteration: 81
Train-Err: nan
Train-Accuracy: 0.097
Test-Err: nan
Test-Accuracy: 0.098
---------------------
Iteration: 82
Train-Err: nan
Train-Accuracy: 0.097
Test-Err: nan
Test-Accuracy: 0.098
---------------------
Iteration: 83
Train-Err: nan
Train-Accuracy: 0.097
Test-Err: nan
Test-Accuracy: 0.098
---------------------
Iteration: 84
Train-Err: nan
Train-Accuracy: 0.097
Test-Err: nan
Test-Accuracy: 0.098
---------------------
Iteration: 85
Train-Err: nan
Train-Accuracy: 0.097
Test-Err: nan
Test-Accuracy: 0.098
---------------------
Iteration: 86
Train-Err: nan
Train-Accuracy: 0.097
Test-Err: nan
Test-Accuracy: 0.098
---------------------
Iteration: 87
Train-Err: nan
Train-Accuracy: 0.097
Test-Err: nan
Test-Accuracy: 0.098
---------------------
Iteration: 88
Train-Err: nan