3 Layer Newtork on MNIST

In [1]:
import numpy as np
from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

images, labels = (x_train[0:1000].reshape(1000, 28*28)/255, y_train[0:1000])

one_hot_labels = np.zeros((len(labels),10))
for i, l in enumerate(labels):
    one_hot_labels[i][l] = 1
labels = one_hot_labels

test_images = x_test.reshape(len(x_test), 28*28)/255
test_labels = np.zeros((len(y_test),10))
for i, l in enumerate(y_test):
    test_labels[i][l] = 1
    
np.random.seed(1)

def relu(x):
    return (x >= 0) * x

def relu2deriv(x):
    return (x >= 0)

Using TensorFlow backend.


In [2]:
alpha = 0.005 
iterations = 300 
hidden_size = 40 
pixels_per_image = 784 
num_labels = 10

weights_0 = 0.2*np.random.random((pixels_per_image, hidden_size)) - 0.1
weights_1 = 0.2*np.random.random((hidden_size, num_labels)) - 0.1

for j in range(iterations):
    error = 0
    correct_cnt = 0
    
    for i in range(len(images)):
        layer_0 = images[i:i+1]
        layer_1 = relu(np.dot(layer_0, weights_0))
        layer_2 = np.dot(layer_1, weights_1)
        
        error += np.sum((labels[i:i+1] - layer_2) ** 2)
        correct_cnt += int(np.argmax(layer_2) == np.argmax(labels[i:i+1]))
        
        layer_2_delta = labels[i:i+1] - layer_2
        layer_1_delta = layer_2_delta.dot(weights_1.T) * relu2deriv(layer_1)
        
        weights_1 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0 += alpha * layer_0.T.dot(layer_1_delta)
        
    print("Iteration: "+str(j))
    print("Train-Err: "+str(error/float(len(images)))[0:5])
    print("Train-Accuracy: "+str(correct_cnt/float(len(images))))
    print("---------------------")

Iteration: 0
Train-Err: 0.722
Train-Accuracy: 0.537
---------------------
Iteration: 1
Train-Err: 0.512
Train-Accuracy: 0.753
---------------------
Iteration: 2
Train-Err: 0.448
Train-Accuracy: 0.804
---------------------
Iteration: 3
Train-Err: 0.411
Train-Accuracy: 0.837
---------------------
Iteration: 4
Train-Err: 0.385
Train-Accuracy: 0.846
---------------------
Iteration: 5
Train-Err: 0.367
Train-Accuracy: 0.863
---------------------
Iteration: 6
Train-Err: 0.352
Train-Accuracy: 0.876
---------------------
Iteration: 7
Train-Err: 0.340
Train-Accuracy: 0.884
---------------------
Iteration: 8
Train-Err: 0.329
Train-Accuracy: 0.892
---------------------
Iteration: 9
Train-Err: 0.320
Train-Accuracy: 0.897
---------------------
Iteration: 10
Train-Err: 0.312
Train-Accuracy: 0.901
---------------------
Iteration: 11
Train-Err: 0.304
Train-Accuracy: 0.904
---------------------
Iteration: 12
Train-Err: 0.297
Train-Accuracy: 0.91
---------------------
Iteration: 13
Train-Err: 0.291
Train

Iteration: 110
Train-Err: 0.161
Train-Accuracy: 0.984
---------------------
Iteration: 111
Train-Err: 0.161
Train-Accuracy: 0.984
---------------------
Iteration: 112
Train-Err: 0.161
Train-Accuracy: 0.985
---------------------
Iteration: 113
Train-Err: 0.160
Train-Accuracy: 0.985
---------------------
Iteration: 114
Train-Err: 0.160
Train-Accuracy: 0.986
---------------------
Iteration: 115
Train-Err: 0.159
Train-Accuracy: 0.986
---------------------
Iteration: 116
Train-Err: 0.159
Train-Accuracy: 0.986
---------------------
Iteration: 117
Train-Err: 0.158
Train-Accuracy: 0.986
---------------------
Iteration: 118
Train-Err: 0.158
Train-Accuracy: 0.986
---------------------
Iteration: 119
Train-Err: 0.158
Train-Accuracy: 0.986
---------------------
Iteration: 120
Train-Err: 0.157
Train-Accuracy: 0.986
---------------------
Iteration: 121
Train-Err: 0.157
Train-Accuracy: 0.986
---------------------
Iteration: 122
Train-Err: 0.156
Train-Accuracy: 0.986
---------------------
Iteration: 1

Iteration: 220
Train-Err: 0.125
Train-Accuracy: 0.998
---------------------
Iteration: 221
Train-Err: 0.125
Train-Accuracy: 0.998
---------------------
Iteration: 222
Train-Err: 0.125
Train-Accuracy: 0.998
---------------------
Iteration: 223
Train-Err: 0.124
Train-Accuracy: 0.998
---------------------
Iteration: 224
Train-Err: 0.124
Train-Accuracy: 0.998
---------------------
Iteration: 225
Train-Err: 0.124
Train-Accuracy: 0.998
---------------------
Iteration: 226
Train-Err: 0.124
Train-Accuracy: 0.998
---------------------
Iteration: 227
Train-Err: 0.124
Train-Accuracy: 0.998
---------------------
Iteration: 228
Train-Err: 0.124
Train-Accuracy: 0.998
---------------------
Iteration: 229
Train-Err: 0.123
Train-Accuracy: 0.998
---------------------
Iteration: 230
Train-Err: 0.123
Train-Accuracy: 0.998
---------------------
Iteration: 231
Train-Err: 0.123
Train-Accuracy: 0.998
---------------------
Iteration: 232
Train-Err: 0.123
Train-Accuracy: 0.998
---------------------
Iteration: 2

In [3]:
error = 0
correct_cnt = 0

for i in range(len(test_images)):
    layer_0 = test_images[i:i+1]
    layer_1 = relu(np.dot(layer_0, weights_0))
    layer_2 = np.dot(layer_1, weights_1)
    
    error += np.sum((test_labels[i:i+1] - layer_2) ** 2)
    correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))
    
print("Test-Err: "+str((error/float(len(images)))/10)[0:5])
print("Test-Accuracy: "+str((correct_cnt/float(len(images)))/10))

Test-Err: 0.614
Test-Accuracy: 0.7182999999999999


In [4]:
for j in range(iterations):
    error = 0
    correct_cnt = 0
    
    for i in range(len(images)):
        layer_0 = images[i:i+1]
        layer_1 = relu(np.dot(layer_0, weights_0))
        layer_2 = np.dot(layer_1, weights_1)
        
        error += np.sum((labels[i:i+1] - layer_2) ** 2)
        correct_cnt += int(np.argmax(layer_2) == np.argmax(labels[i:i+1]))
        
        layer_2_delta = labels[i:i+1] - layer_2
        layer_1_delta = layer_2_delta.dot(weights_1.T) * relu2deriv(layer_1)
        
        weights_1 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0 += alpha * layer_0.T.dot(layer_1_delta)
        
    print("Iteration: "+str(j))
    print("Train-Err: "+str(error/float(len(images)))[0:5])
    print("Train-Accuracy: "+str(correct_cnt/float(len(images))))
    
    test_err = 0
    test_correct_cnt = 0
    
    for i in range(len(test_images)):
        layer_0 = test_images[i:i+1]
        layer_1 = relu(np.dot(layer_0, weights_0))
        layer_2 = np.dot(layer_1, weights_1)
        
        test_err += np.sum((test_labels[i:i+1] - layer_2) ** 2)
        test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))
        
    print("Test-Err: "+str((test_err/float(len(images)))/10)[0:5])
    print("Test-Accuracy: "+str((test_correct_cnt/float(len(images)))/10))
    print("---------------------")

Iteration: 0
Train-Err: 0.113
Train-Accuracy: 0.999
Test-Err: 0.614
Test-Accuracy: 0.7182999999999999
---------------------
Iteration: 1
Train-Err: 0.113
Train-Accuracy: 0.999
Test-Err: 0.615
Test-Accuracy: 0.7184
---------------------
Iteration: 2
Train-Err: 0.113
Train-Accuracy: 0.999
Test-Err: 0.616
Test-Accuracy: 0.7177
---------------------
Iteration: 3
Train-Err: 0.112
Train-Accuracy: 0.999
Test-Err: 0.617
Test-Accuracy: 0.7173
---------------------
Iteration: 4
Train-Err: 0.112
Train-Accuracy: 0.999
Test-Err: 0.617
Test-Accuracy: 0.7178
---------------------
Iteration: 5
Train-Err: 0.112
Train-Accuracy: 0.999
Test-Err: 0.618
Test-Accuracy: 0.7178
---------------------
Iteration: 6
Train-Err: 0.112
Train-Accuracy: 0.999
Test-Err: 0.619
Test-Accuracy: 0.7177
---------------------
Iteration: 7
Train-Err: 0.112
Train-Accuracy: 0.999
Test-Err: 0.619
Test-Accuracy: 0.7171000000000001
---------------------
Iteration: 8
Train-Err: 0.112
Train-Accuracy: 0.999
Test-Err: 0.620
Test-Accurac

Test-Err: 0.673
Test-Accuracy: 0.7007
---------------------
Iteration: 73
Train-Err: 0.107
Train-Accuracy: 0.999
Test-Err: 0.674
Test-Accuracy: 0.701
---------------------
Iteration: 74
Train-Err: 0.107
Train-Accuracy: 0.999
Test-Err: 0.674
Test-Accuracy: 0.701
---------------------
Iteration: 75
Train-Err: 0.107
Train-Accuracy: 0.999
Test-Err: 0.675
Test-Accuracy: 0.7008
---------------------
Iteration: 76
Train-Err: 0.107
Train-Accuracy: 0.999
Test-Err: 0.676
Test-Accuracy: 0.7006
---------------------
Iteration: 77
Train-Err: 0.107
Train-Accuracy: 0.999
Test-Err: 0.676
Test-Accuracy: 0.7005
---------------------
Iteration: 78
Train-Err: 0.107
Train-Accuracy: 0.999
Test-Err: 0.677
Test-Accuracy: 0.7005
---------------------
Iteration: 79
Train-Err: 0.107
Train-Accuracy: 0.999
Test-Err: 0.678
Test-Accuracy: 0.7007
---------------------
Iteration: 80
Train-Err: 0.107
Train-Accuracy: 0.998
Test-Err: 0.679
Test-Accuracy: 0.7007
---------------------
Iteration: 81
Train-Err: 0.107
Train-A

Test-Err: 0.711
Test-Accuracy: 0.6939
---------------------
Iteration: 144
Train-Err: 0.105
Train-Accuracy: 0.998
Test-Err: 0.711
Test-Accuracy: 0.6942
---------------------
Iteration: 145
Train-Err: 0.105
Train-Accuracy: 0.998
Test-Err: 0.711
Test-Accuracy: 0.6942999999999999
---------------------
Iteration: 146
Train-Err: 0.105
Train-Accuracy: 0.998
Test-Err: 0.711
Test-Accuracy: 0.6944
---------------------
Iteration: 147
Train-Err: 0.105
Train-Accuracy: 0.998
Test-Err: 0.712
Test-Accuracy: 0.6940999999999999
---------------------
Iteration: 148
Train-Err: 0.105
Train-Accuracy: 0.998
Test-Err: 0.712
Test-Accuracy: 0.6939
---------------------
Iteration: 149
Train-Err: 0.105
Train-Accuracy: 0.998
Test-Err: 0.712
Test-Accuracy: 0.6936
---------------------
Iteration: 150
Train-Err: 0.105
Train-Accuracy: 0.998
Test-Err: 0.712
Test-Accuracy: 0.6932
---------------------
Iteration: 151
Train-Err: 0.105
Train-Accuracy: 0.998
Test-Err: 0.713
Test-Accuracy: 0.6928
---------------------
Iter

Test-Err: 0.734
Test-Accuracy: 0.6868000000000001
---------------------
Iteration: 215
Train-Err: 0.104
Train-Accuracy: 0.998
Test-Err: 0.734
Test-Accuracy: 0.6867
---------------------
Iteration: 216
Train-Err: 0.104
Train-Accuracy: 0.998
Test-Err: 0.735
Test-Accuracy: 0.6865
---------------------
Iteration: 217
Train-Err: 0.104
Train-Accuracy: 0.998
Test-Err: 0.735
Test-Accuracy: 0.6863
---------------------
Iteration: 218
Train-Err: 0.104
Train-Accuracy: 0.998
Test-Err: 0.735
Test-Accuracy: 0.6858
---------------------
Iteration: 219
Train-Err: 0.104
Train-Accuracy: 0.998
Test-Err: 0.736
Test-Accuracy: 0.6855
---------------------
Iteration: 220
Train-Err: 0.104
Train-Accuracy: 0.998
Test-Err: 0.736
Test-Accuracy: 0.6851
---------------------
Iteration: 221
Train-Err: 0.104
Train-Accuracy: 0.998
Test-Err: 0.736
Test-Accuracy: 0.6849000000000001
---------------------
Iteration: 222
Train-Err: 0.104
Train-Accuracy: 0.998
Test-Err: 0.736
Test-Accuracy: 0.6851
---------------------
Iter

Test-Err: 0.755
Test-Accuracy: 0.6746000000000001
---------------------
Iteration: 286
Train-Err: 0.104
Train-Accuracy: 0.998
Test-Err: 0.755
Test-Accuracy: 0.6743
---------------------
Iteration: 287
Train-Err: 0.104
Train-Accuracy: 0.998
Test-Err: 0.755
Test-Accuracy: 0.674
---------------------
Iteration: 288
Train-Err: 0.104
Train-Accuracy: 0.998
Test-Err: 0.755
Test-Accuracy: 0.6738000000000001
---------------------
Iteration: 289
Train-Err: 0.104
Train-Accuracy: 0.998
Test-Err: 0.755
Test-Accuracy: 0.6736
---------------------
Iteration: 290
Train-Err: 0.104
Train-Accuracy: 0.998
Test-Err: 0.755
Test-Accuracy: 0.6736
---------------------
Iteration: 291
Train-Err: 0.104
Train-Accuracy: 0.998
Test-Err: 0.756
Test-Accuracy: 0.6736
---------------------
Iteration: 292
Train-Err: 0.104
Train-Accuracy: 0.998
Test-Err: 0.756
Test-Accuracy: 0.6735
---------------------
Iteration: 293
Train-Err: 0.104
Train-Accuracy: 0.998
Test-Err: 0.756
Test-Accuracy: 0.6733
---------------------
Itera

Dropout

In [5]:
alpha = 0.005 
iterations = 300 
hidden_size = 40 
pixels_per_image = 784 
num_labels = 10

weights_0 = 0.2 * np.random.random((pixels_per_image, hidden_size)) - 0.1
weights_1 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1

for j in range(iterations):
    error = 0
    correct_cnt = 0
    test_err = 0
    test_correct_cnt = 0
    
    for i in range(len(images)):
        layer_0 = images[i:i+1]
        layer_1 = relu(np.dot(layer_0, weights_0))
        dropout = np.random.randint(2, size = layer_1.shape) #dropout
        layer_1 *= dropout * 2
        layer_2 = np.dot(layer_1, weights_1)
        
        error += np.sum((labels[i:i+1] - layer_2) ** 2)
        correct_cnt += int(np.argmax(layer_2) == np.argmax(labels[i:i+1]))
        
        layer_2_delta = labels[i:i+1] - layer_2
        layer_1_delta = layer_2_delta.dot(weights_1.T) * relu2deriv(layer_1)
        layer_1_delta *= dropout
        
        weights_1 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0 += alpha * layer_0.T.dot(layer_1_delta)
        
    for i in range(len(test_images)):
        layer_0 = test_images[i:i+1]
        layer_1 = relu(np.dot(layer_0, weights_0))
        layer_2 = np.dot(layer_1, weights_1)
        
        test_err = np.sum((test_labels[i:i+1] - layer_2) ** 2)
        test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))
        
    print("Iteration: "+str(j))
    print("Train-Err: "+str(error/float(len(images)))[0:5])
    print("Train-Accuracy: "+str(correct_cnt/float(len(images))))
    print("Test-Err: "+str((test_err/float(len(images)))/10)[0:5])
    print("Test-Accuracy: "+str((test_correct_cnt/float(len(images)))/10))
    print("---------------------")

Iteration: 0
Train-Err: 0.903
Train-Accuracy: 0.275
Test-Err: 7.147
Test-Accuracy: 0.5112
---------------------
Iteration: 1
Train-Err: 0.747
Train-Accuracy: 0.453
Test-Err: 6.805
Test-Accuracy: 0.5759000000000001
---------------------
Iteration: 2
Train-Err: 0.696
Train-Accuracy: 0.499
Test-Err: 6.376
Test-Accuracy: 0.601
---------------------
Iteration: 3
Train-Err: 0.659
Train-Accuracy: 0.55
Test-Err: 4.698
Test-Accuracy: 0.6537
---------------------
Iteration: 4
Train-Err: 0.630
Train-Accuracy: 0.591
Test-Err: 5.714
Test-Accuracy: 0.6769000000000001
---------------------
Iteration: 5
Train-Err: 0.617
Train-Accuracy: 0.599
Test-Err: 4.568
Test-Accuracy: 0.6950000000000001
---------------------
Iteration: 6
Train-Err: 0.597
Train-Accuracy: 0.624
Test-Err: 4.577
Test-Accuracy: 0.7218
---------------------
Iteration: 7
Train-Err: 0.588
Train-Accuracy: 0.622
Test-Err: 2.812
Test-Accuracy: 0.7422
---------------------
Iteration: 8
Train-Err: 0.577
Train-Accuracy: 0.638
Test-Err: 2.765
Te

Iteration: 71
Train-Err: 0.463
Train-Accuracy: 0.766
Test-Err: 3.823
Test-Accuracy: 0.7905
---------------------
Iteration: 72
Train-Err: 0.471
Train-Accuracy: 0.737
Test-Err: 3.675
Test-Accuracy: 0.7971
---------------------
Iteration: 73
Train-Err: 0.470
Train-Accuracy: 0.761
Test-Err: 3.308
Test-Accuracy: 0.7893
---------------------
Iteration: 74
Train-Err: 0.458
Train-Accuracy: 0.774
Test-Err: 1.125
Test-Accuracy: 0.8018000000000001
---------------------
Iteration: 75
Train-Err: 0.460
Train-Accuracy: 0.763
Test-Err: 1.786
Test-Accuracy: 0.7799
---------------------
Iteration: 76
Train-Err: 0.465
Train-Accuracy: 0.76
Test-Err: 1.592
Test-Accuracy: 0.7883
---------------------
Iteration: 77
Train-Err: 0.455
Train-Accuracy: 0.772
Test-Err: 1.245
Test-Accuracy: 0.786
---------------------
Iteration: 78
Train-Err: 0.451
Train-Accuracy: 0.769
Test-Err: 1.417
Test-Accuracy: 0.796
---------------------
Iteration: 79
Train-Err: 0.458
Train-Accuracy: 0.764
Test-Err: 1.199
Test-Accuracy: 0.7

Iteration: 142
Train-Err: 0.462
Train-Accuracy: 0.772
Test-Err: 5.157
Test-Accuracy: 0.7974
---------------------
Iteration: 143
Train-Err: 0.427
Train-Accuracy: 0.796
Test-Err: 7.095
Test-Accuracy: 0.7934
---------------------
Iteration: 144
Train-Err: 0.431
Train-Accuracy: 0.793
Test-Err: 2.042
Test-Accuracy: 0.7885
---------------------
Iteration: 145
Train-Err: 0.446
Train-Accuracy: 0.79
Test-Err: 4.300
Test-Accuracy: 0.7931
---------------------
Iteration: 146
Train-Err: 0.444
Train-Accuracy: 0.772
Test-Err: 4.826
Test-Accuracy: 0.792
---------------------
Iteration: 147
Train-Err: 0.428
Train-Accuracy: 0.807
Test-Err: 2.938
Test-Accuracy: 0.7948000000000001
---------------------
Iteration: 148
Train-Err: 0.436
Train-Accuracy: 0.77
Test-Err: 3.087
Test-Accuracy: 0.795
---------------------
Iteration: 149
Train-Err: 0.452
Train-Accuracy: 0.771
Test-Err: 3.199
Test-Accuracy: 0.7857999999999999
---------------------
Iteration: 150
Train-Err: 0.427
Train-Accuracy: 0.784
Test-Err: 5.56

Iteration: 213
Train-Err: 0.429
Train-Accuracy: 0.804
Test-Err: 3.088
Test-Accuracy: 0.8027
---------------------
Iteration: 214
Train-Err: 0.413
Train-Accuracy: 0.811
Test-Err: 4.898
Test-Accuracy: 0.8012
---------------------
Iteration: 215
Train-Err: 0.399
Train-Accuracy: 0.815
Test-Err: 5.026
Test-Accuracy: 0.8005000000000001
---------------------
Iteration: 216
Train-Err: 0.407
Train-Accuracy: 0.817
Test-Err: 4.695
Test-Accuracy: 0.8054
---------------------
Iteration: 217
Train-Err: 0.401
Train-Accuracy: 0.814
Test-Err: 1.568
Test-Accuracy: 0.795
---------------------
Iteration: 218
Train-Err: 0.428
Train-Accuracy: 0.805
Test-Err: 2.617
Test-Accuracy: 0.8012
---------------------
Iteration: 219
Train-Err: 0.420
Train-Accuracy: 0.814
Test-Err: 2.540
Test-Accuracy: 0.7998999999999999
---------------------
Iteration: 220
Train-Err: 0.405
Train-Accuracy: 0.815
Test-Err: 3.049
Test-Accuracy: 0.7908000000000001
---------------------
Iteration: 221
Train-Err: 0.435
Train-Accuracy: 0.779

Iteration: 284
Train-Err: 0.418
Train-Accuracy: 0.812
Test-Err: 4.206
Test-Accuracy: 0.7924
---------------------
Iteration: 285
Train-Err: 0.412
Train-Accuracy: 0.812
Test-Err: 1.048
Test-Accuracy: 0.7868
---------------------
Iteration: 286
Train-Err: 0.433
Train-Accuracy: 0.794
Test-Err: 2.381
Test-Accuracy: 0.7864
---------------------
Iteration: 287
Train-Err: 0.422
Train-Accuracy: 0.816
Test-Err: 2.908
Test-Accuracy: 0.7794
---------------------
Iteration: 288
Train-Err: 0.419
Train-Accuracy: 0.807
Test-Err: 5.844
Test-Accuracy: 0.7845
---------------------
Iteration: 289
Train-Err: 0.410
Train-Accuracy: 0.811
Test-Err: 3.402
Test-Accuracy: 0.7907
---------------------
Iteration: 290
Train-Err: 0.426
Train-Accuracy: 0.805
Test-Err: 8.464
Test-Accuracy: 0.7893
---------------------
Iteration: 291
Train-Err: 0.416
Train-Accuracy: 0.805
Test-Err: 2.135
Test-Accuracy: 0.7842
---------------------
Iteration: 292
Train-Err: 0.425
Train-Accuracy: 0.807
Test-Err: 5.322
Test-Accuracy: 0.7

Batch Gradient Descent

In [3]:
np.random.seed(1)
    
alpha = 0.001
iterations = 300
hidden_size = 40 
pixels_per_image = 784 
num_labels = 10

weights_0 = 0.2 * np.random.random((pixels_per_image, hidden_size)) - 0.1
weights_1 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1

batch_size = 100 #batch gradient descent

for j in range(iterations):
    error = 0
    correct_cnt = 0
    test_err = 0
    test_correct_cnt = 0
    
    for i in range(int(len(images) / batch_size)):
        batch_start, batch_end = ((i * batch_size),((i+1)*batch_size))

        layer_0 = images[batch_start:batch_end]
        layer_1 = relu(np.dot(layer_0,weights_0))
        dropout = np.random.randint(2,size=layer_1.shape)
        layer_1 *= dropout * 2
        layer_2 = np.dot(layer_1,weights_1)

        error += np.sum((labels[batch_start:batch_end] - layer_2) ** 2)
        for k in range(batch_size):
            correct_cnt += int(np.argmax(layer_2[k:k+1]) == np.argmax(labels[batch_start+k:batch_start+k+1]))

            layer_2_delta = (labels[batch_start:batch_end]-layer_2)/batch_size
            layer_1_delta = layer_2_delta.dot(weights_1.T)* relu2deriv(layer_1)
            layer_1_delta *= dropout

            weights_1 += alpha * layer_1.T.dot(layer_2_delta)
            weights_0 += alpha * layer_0.T.dot(layer_1_delta)
            

    for i in range(len(test_images)):
        layer_0 = test_images[i:i+1]
        layer_1 = relu(np.dot(layer_0,weights_0))
        layer_2 = np.dot(layer_1, weights_1)

        test_err += np.sum((test_labels[i:i+1] - layer_2) ** 2)
        test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))
        
    print("Iteration: "+str(j))
    print("Train-Err: "+str(error/float(len(images)))[0:5])
    print("Train-Accuracy: "+str(correct_cnt/float(len(images))))
    print("Test-Err: "+str((test_err/float(len(images)))/10)[0:5])
    print("Test-Accuracy: "+str((test_correct_cnt/float(len(images)))/10))
    print("---------------------")

Iteration: 0
Train-Err: 1.112
Train-Accuracy: 0.136
Test-Err: 0.863
Test-Accuracy: 0.2907
---------------------
Iteration: 1
Train-Err: 0.888
Train-Accuracy: 0.276
Test-Err: 0.800
Test-Accuracy: 0.4359
---------------------
Iteration: 2
Train-Err: 0.833
Train-Accuracy: 0.352
Test-Err: 0.761
Test-Accuracy: 0.5233
---------------------
Iteration: 3
Train-Err: 0.789
Train-Accuracy: 0.418
Test-Err: 0.734
Test-Accuracy: 0.5570999999999999
---------------------
Iteration: 4
Train-Err: 0.767
Train-Accuracy: 0.455
Test-Err: 0.711
Test-Accuracy: 0.5775
---------------------
Iteration: 5
Train-Err: 0.753
Train-Accuracy: 0.448
Test-Err: 0.696
Test-Accuracy: 0.5926
---------------------
Iteration: 6
Train-Err: 0.723
Train-Accuracy: 0.507
Test-Err: 0.673
Test-Accuracy: 0.6121000000000001
---------------------
Iteration: 7
Train-Err: 0.706
Train-Accuracy: 0.524
Test-Err: 0.658
Test-Accuracy: 0.6282
---------------------
Iteration: 8
Train-Err: 0.693
Train-Accuracy: 0.529
Test-Err: 0.643
Test-Accurac

Iteration: 71
Train-Err: 0.543
Train-Accuracy: 0.663
Test-Err: 0.497
Test-Accuracy: 0.7463
---------------------
Iteration: 72
Train-Err: 0.527
Train-Accuracy: 0.69
Test-Err: 0.491
Test-Accuracy: 0.7477
---------------------
Iteration: 73
Train-Err: 0.541
Train-Accuracy: 0.67
Test-Err: 0.493
Test-Accuracy: 0.7498
---------------------
Iteration: 74
Train-Err: 0.530
Train-Accuracy: 0.672
Test-Err: 0.493
Test-Accuracy: 0.7492
---------------------
Iteration: 75
Train-Err: 0.532
Train-Accuracy: 0.674
Test-Err: 0.493
Test-Accuracy: 0.7551
---------------------
Iteration: 76
Train-Err: 0.526
Train-Accuracy: 0.682
Test-Err: 0.493
Test-Accuracy: 0.7485
---------------------
Iteration: 77
Train-Err: 0.537
Train-Accuracy: 0.668
Test-Err: 0.489
Test-Accuracy: 0.7492
---------------------
Iteration: 78
Train-Err: 0.531
Train-Accuracy: 0.673
Test-Err: 0.492
Test-Accuracy: 0.751
---------------------
Iteration: 79
Train-Err: 0.526
Train-Accuracy: 0.689
Test-Err: 0.492
Test-Accuracy: 0.7496
--------

Iteration: 143
Train-Err: 0.495
Train-Accuracy: 0.714
Test-Err: 0.462
Test-Accuracy: 0.7674000000000001
---------------------
Iteration: 144
Train-Err: 0.496
Train-Accuracy: 0.693
Test-Err: 0.462
Test-Accuracy: 0.7698
---------------------
Iteration: 145
Train-Err: 0.500
Train-Accuracy: 0.715
Test-Err: 0.468
Test-Accuracy: 0.7642
---------------------
Iteration: 146
Train-Err: 0.501
Train-Accuracy: 0.705
Test-Err: 0.467
Test-Accuracy: 0.7748
---------------------
Iteration: 147
Train-Err: 0.513
Train-Accuracy: 0.706
Test-Err: 0.468
Test-Accuracy: 0.7687999999999999
---------------------
Iteration: 148
Train-Err: 0.490
Train-Accuracy: 0.725
Test-Err: 0.468
Test-Accuracy: 0.771
---------------------
Iteration: 149
Train-Err: 0.494
Train-Accuracy: 0.698
Test-Err: 0.468
Test-Accuracy: 0.7658
---------------------
Iteration: 150
Train-Err: 0.494
Train-Accuracy: 0.733
Test-Err: 0.468
Test-Accuracy: 0.7676000000000001
---------------------
Iteration: 151
Train-Err: 0.492
Train-Accuracy: 0.723

Iteration: 214
Train-Err: 0.470
Train-Accuracy: 0.725
Test-Err: 0.445
Test-Accuracy: 0.7913
---------------------
Iteration: 215
Train-Err: 0.466
Train-Accuracy: 0.741
Test-Err: 0.445
Test-Accuracy: 0.7877
---------------------
Iteration: 216
Train-Err: 0.459
Train-Accuracy: 0.753
Test-Err: 0.450
Test-Accuracy: 0.7846
---------------------
Iteration: 217
Train-Err: 0.456
Train-Accuracy: 0.742
Test-Err: 0.446
Test-Accuracy: 0.7925
---------------------
Iteration: 218
Train-Err: 0.468
Train-Accuracy: 0.748
Test-Err: 0.445
Test-Accuracy: 0.7902
---------------------
Iteration: 219
Train-Err: 0.467
Train-Accuracy: 0.735
Test-Err: 0.448
Test-Accuracy: 0.79
---------------------
Iteration: 220
Train-Err: 0.464
Train-Accuracy: 0.744
Test-Err: 0.447
Test-Accuracy: 0.787
---------------------
Iteration: 221
Train-Err: 0.451
Train-Accuracy: 0.731
Test-Err: 0.445
Test-Accuracy: 0.7882
---------------------
Iteration: 222
Train-Err: 0.470
Train-Accuracy: 0.747
Test-Err: 0.447
Test-Accuracy: 0.7826

Iteration: 285
Train-Err: 0.463
Train-Accuracy: 0.743
Test-Err: 0.443
Test-Accuracy: 0.7922
---------------------
Iteration: 286
Train-Err: 0.450
Train-Accuracy: 0.734
Test-Err: 0.432
Test-Accuracy: 0.7931
---------------------
Iteration: 287
Train-Err: 0.459
Train-Accuracy: 0.738
Test-Err: 0.436
Test-Accuracy: 0.7941
---------------------
Iteration: 288
Train-Err: 0.462
Train-Accuracy: 0.752
Test-Err: 0.435
Test-Accuracy: 0.7963
---------------------
Iteration: 289
Train-Err: 0.455
Train-Accuracy: 0.754
Test-Err: 0.434
Test-Accuracy: 0.79
---------------------
Iteration: 290
Train-Err: 0.468
Train-Accuracy: 0.731
Test-Err: 0.437
Test-Accuracy: 0.7908000000000001
---------------------
Iteration: 291
Train-Err: 0.466
Train-Accuracy: 0.745
Test-Err: 0.437
Test-Accuracy: 0.7901
---------------------
Iteration: 292
Train-Err: 0.463
Train-Accuracy: 0.743
Test-Err: 0.436
Test-Accuracy: 0.7943
---------------------
Iteration: 293
Train-Err: 0.468
Train-Accuracy: 0.746
Test-Err: 0.439
Test-Acc