In [2]:
# MNIST Neural Network from scratch with numpy

import numpy as np
import sys

data = np.load('mnist.npz')

In [3]:
x_train = data['x_train']
x_train = x_train[0:7000]
x_test = data['x_test']
x_test = x_test[0:1000]
y_train = data['y_train']
y_train = y_train[0:7000]
y_test = data['y_test']
y_test = y_test[0:1000]

print("x_train shape: ",x_train.shape)
print("x_test shape: ",x_test.shape)
print("y_train shape: ",y_train.shape)
print("y_test shape: ",y_test.shape)

x_train shape:  (7000, 28, 28)
x_test shape:  (1000, 28, 28)
y_train shape:  (7000,)
y_test shape:  (1000,)


In [4]:
images = x_train.reshape(len(x_train), 28*28)/255

print(images.shape)

(7000, 784)


In [5]:
labels = y_train

print(labels)
print("\n",labels.shape)

[5 0 4 ... 5 1 9]

 (7000,)


In [6]:
one_hot_labels = np.zeros((len(labels), 10))

In [7]:
for i, l in enumerate(labels):
    one_hot_labels[i][l]=1
    
labels = one_hot_labels

labels

array([[0., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [8]:
one_hot_labels.shape

(7000, 10)

In [9]:
x_test.shape

(1000, 28, 28)

In [10]:
test_images = x_test.reshape(len(x_test), 28*28)/255
test_labels = np.zeros((len(y_test), 10))

In [11]:
# default NN without regularization
for i,l in enumerate(y_test):
    test_labels[i][l]=1
    
np.random.seed(1)
relu = lambda x: (x>0)*x
relu2deriv = lambda x: x>0

alpha = 0.005
iterations = 40
hidden_size = 40
pixels_per_image = 784
num_labels = 10

weights_0_1 = 0.2*np.random.random((pixels_per_image, hidden_size))-0.1
weights_1_2 = 0.2*np.random.random((hidden_size, num_labels))-0.1

for j in range(iterations):
    error, correct_cnt = (0.0, 0)
    
    for i in range(len(images)):
        layer_0 = images[i:i+1]
        layer_1 = relu(np.dot(layer_0, weights_0_1))
        layer_2 = np.dot(layer_1, weights_1_2)
        
        error += np.sum((labels[i:i+1]-layer_2)**2)
        layer_2_delta = (labels[i:i+1]-layer_2)
        correct_cnt += int(np.argmax(layer_2) == np.argmax(labels[i:i+1]))
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)
        
        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)
        
    sys.stdout.write("\r "+" I: "+str(j)+" Error: "+str(error/float(len(images)))[0:5]+" Correct: "+str(correct_cnt/float(len(images))))
    
    if( j%10==0 or j==iterations-1 ):
        error_, correct_cnt = (0.0, 0)
    
        for i in range(len(test_images)):
            layer_0 = test_images[i:i+1]
            layer_1 = relu(np.dot(layer_0, weights_0_1))
            layer_2 = np.dot(layer_1, weights_1_2)

            error_ += np.sum((test_labels[i:i+1]-layer_2)**2)
            correct_cnt += int(np.argmax(layer_2)==np.argmax(test_labels))
        
        sys.stdout.write(" Test-Err: "+str(error_/float(len(test_images)))[0:5]+" Test-Acc: "+str(correct_cnt/float(len(test_images))))
        print()

  I: 0 Error: 0.434 Correct: 0.7848571428571428 Test-Err: 0.341 Test-Acc: 0.101
  I: 10 Error: 0.140 Correct: 0.956 Test-Err: 0.202 Test-Acc: 0.092
  I: 20 Error: 0.112 Correct: 0.9691428571428572 Test-Err: 0.191 Test-Acc: 0.092
  I: 30 Error: 0.096 Correct: 0.9778571428571429 Test-Err: 0.191 Test-Acc: 0.096
  I: 39 Error: 0.085 Correct: 0.9818571428571429 Test-Err: 0.190 Test-Acc: 0.099


In [12]:
# Dropout regularization version

for i,l in enumerate(y_test):
    test_labels[i][l]=1
    
np.random.seed(1)
relu = lambda x: (x>0)*x
relu2deriv = lambda x: x>0

alpha = 0.005
iterations = 300
hidden_size = 40
pixels_per_image = 784
num_labels = 10

weights_0_1 = 0.2*np.random.random((pixels_per_image, hidden_size))-0.1
weights_1_2 = 0.2*np.random.random((hidden_size, num_labels))-0.1

for j in range(iterations):
    error, correct_cnt = (0.0, 0)
    
    for i in range(len(images)):
        layer_0 = images[i:i+1]
        layer_1 = relu(np.dot(layer_0, weights_0_1))
        dropout_mask = np.random.randint(2, size=layer_1.shape)     # <---------  this
        layer_1 *= dropout_mask *2       # <---------------  this
        layer_2 = np.dot(layer_1, weights_1_2)
        
        error += np.sum((labels[i:i+1]-layer_2)**2)
        layer_2_delta = (labels[i:i+1]-layer_2)
        correct_cnt += int(np.argmax(layer_2) == np.argmax(labels[i:i+1]))
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)
        layer_1_delta *= dropout_mask                   # <----------------  this
        
        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)
        
    sys.stdout.write("\r "+" I: "+str(j)+" Error: "+str(error/float(len(images)))[0:5]+" Correct: "+str(correct_cnt/float(len(images))))
    
    if( j%10==0 or j==iterations-1 ):
        error_, correct_cnt = (0.0, 0)
    
        for i in range(len(test_images)):
            layer_0 = test_images[i:i+1]
            layer_1 = relu(np.dot(layer_0, weights_0_1))
            layer_2 = np.dot(layer_1, weights_1_2)

            error_ += np.sum((test_labels[i:i+1]-layer_2)**2)
            correct_cnt += int(np.argmax(layer_2)==np.argmax(test_labels))
        
        sys.stdout.write(" Test-Err: "+str(error_/float(len(test_images)))[0:5]+" Test-Acc: "+str(correct_cnt/float(len(test_images))))
        print()

  I: 0 Error: 0.675 Correct: 0.5207142857142857 Test-Err: 0.474 Test-Acc: 0.116
  I: 10 Error: 0.387 Correct: 0.8098571428571428 Test-Err: 0.289 Test-Acc: 0.095
  I: 20 Error: 0.370 Correct: 0.8275714285714286 Test-Err: 0.285 Test-Acc: 0.088
  I: 30 Error: 0.356 Correct: 0.8458571428571429 Test-Err: 0.269 Test-Acc: 0.095
  I: 40 Error: 0.348 Correct: 0.8537142857142858 Test-Err: 0.267 Test-Acc: 0.096
  I: 50 Error: 0.345 Correct: 0.8517142857142858 Test-Err: 0.273 Test-Acc: 0.101
  I: 60 Error: 0.339 Correct: 0.8567142857142858 Test-Err: 0.261 Test-Acc: 0.092
  I: 70 Error: 0.342 Correct: 0.8594285714285714 Test-Err: 0.269 Test-Acc: 0.095
  I: 80 Error: 0.328 Correct: 0.8702857142857143 Test-Err: 0.270 Test-Acc: 0.099
  I: 90 Error: 0.330 Correct: 0.869 Test-Err: 0.260 Test-Acc: 0.094
  I: 100 Error: 0.332 Correct: 0.8702857142857143 Test-Err: 0.268 Test-Acc: 0.1
  I: 110 Error: 0.324 Correct: 0.8722857142857143 Test-Err: 0.266 Test-Acc: 0.098
  I: 120 Error: 0.321 Correct: 0.873142857

In [13]:
# Batching regularization version with dropout method

import numpy as np
np.random.seed(1)

def relu(x):
    return (x>0)*x

def relu2deriv(x):
    return x>0

batch_size = 100
alpha, iterations = (0.001, 200)
pixels_per_image, num_labels, hidden_size = (784, 10, 100)
weights_0_1 = 0.2*np.random.random((pixels_per_image,hidden_size)) - 0.1
weights_1_2 = 0.2*np.random.random((hidden_size,num_labels)) - 0.1
for j in range(iterations):
    error, correct_cnt = (0.0, 0)
    for i in range(int(len(images) / batch_size)):
        batch_start, batch_end = ((i * batch_size),((i+1)*batch_size))
        layer_0 = images[batch_start:batch_end]
        layer_1 = relu(np.dot(layer_0,weights_0_1))
        dropout_mask = np.random.randint(2,size=layer_1.shape)
        layer_1 *= dropout_mask * 2
        layer_2 = np.dot(layer_1,weights_1_2)
        error += np.sum((labels[batch_start:batch_end] - layer_2) ** 2)
        for k in range(batch_size):
            correct_cnt += int(np.argmax(layer_2[k:k+1]) == np.argmax(labels[batch_start+k:batch_start+k+1]))
            layer_2_delta = (labels[batch_start:batch_end]-layer_2)/batch_size
            layer_1_delta = layer_2_delta.dot(weights_1_2.T)* relu2deriv(layer_1)
            layer_1_delta *= dropout_mask
            weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
            weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)
    
    sys.stdout.write("\r "+" I: "+str(j)+" Error: "+str(error/float(len(images)))[0:5]+" Correct: "+str(correct_cnt/float(len(images))))
    
    if(j%10 == 0):
        test_error = 0.0
        test_correct_cnt = 0
        for i in range(len(test_images)):
            layer_0 = test_images[i:i+1]
            layer_1 = relu(np.dot(layer_0,weights_0_1))
            layer_2 = np.dot(layer_1, weights_1_2)
            
            error_ += np.sum((test_labels[i:i+1]-layer_2)**2)
            correct_cnt += int(np.argmax(layer_2)==np.argmax(test_labels))
        sys.stdout.write(" Test-Err: "+str(error_/float(len(test_images)))[0:5]+" Test-Acc: "+str(correct_cnt/float(len(test_images))))
        print()

  I: 0 Error: 0.836 Correct: 0.44142857142857145 Test-Err: 0.863 Test-Acc: 3.199
  I: 10 Error: 0.391 Correct: 0.8175714285714286 Test-Err: 1.195 Test-Acc: 5.819
  I: 20 Error: 0.342 Correct: 0.8564285714285714 Test-Err: 1.481 Test-Acc: 6.09
  I: 30 Error: 0.313 Correct: 0.8734285714285714 Test-Err: 1.747 Test-Acc: 6.21
  I: 40 Error: 0.300 Correct: 0.8835714285714286 Test-Err: 1.998 Test-Acc: 6.279
  I: 50 Error: 0.291 Correct: 0.8901428571428571 Test-Err: 2.238 Test-Acc: 6.328
  I: 60 Error: 0.282 Correct: 0.9004285714285715 Test-Err: 2.475 Test-Acc: 6.4
  I: 70 Error: 0.271 Correct: 0.91 Test-Err: 2.706 Test-Acc: 6.47
  I: 80 Error: 0.273 Correct: 0.9064285714285715 Test-Err: 2.934 Test-Acc: 6.444
  I: 90 Error: 0.261 Correct: 0.9125714285714286 Test-Err: 3.158 Test-Acc: 6.488
  I: 100 Error: 0.256 Correct: 0.916 Test-Err: 3.379 Test-Acc: 6.508
  I: 110 Error: 0.253 Correct: 0.9207142857142857 Test-Err: 3.597 Test-Acc: 6.546
  I: 120 Error: 0.251 Correct: 0.922 Test-Err: 3.811 Test-