In [None]:
import numpy as np, sys
from keras.datasets import mnist


(training_images, training_labels), (test_images, test_labels_) = mnist.load_data()

# Flatten the images from (1000, 28, 28) -> (1000, 784).
images = training_images[:1000].reshape(1000, 28*28) / 255
labels_ = training_labels[:1000]
labels = np.zeros((len(labels_), 10))
for i, label in enumerate(labels_):
    # For each row turn on the point at the index of the label.
    labels[i][label] = 1

test_images = test_images.reshape(len(test_images), 28*28) / 255
# Create a matrix of width 10 to store the output.
test_labels = np.zeros((len(test_labels_), 10))
for i, label in enumerate(test_labels_):
    # For each row turn on the point at the index of the label.
    test_labels[i][label] = 1
                       
np.random.seed(1)
relu = lambda x: (x >= 0) * x
relu_derivative = lambda x: x >= 0
alpha = 0.005
hidden_width = 40
iterations = 350
pixels_per_image = 28*28
number_of_labels = 10
                       
scale = lambda array: 0.2*array - 0.1
weights_0_1 = scale(np.random.random((pixels_per_image, hidden_width)))
weights_1_2 = scale(np.random.random((hidden_width, number_of_labels)))

for j in range(iterations):
    error = 0.0
    correct_count = 0
    
    for i in range( len(images)):
        goal_prediction = labels[i:i+1]
        layer_0 = images[i:i+1]
        layer_1 = relu(layer_0.dot(weights_0_1))

        dropout_mask = np.random.randint(2, size=layer_1.shape)
        layer_1 *= dropout_mask * 2

        layer_2 = layer_1.dot(weights_1_2)
        
#         print(f"Goal Prediction: {goal_prediction}")
#         print(f"Output: {layer_2}")
        error += np.sum((goal_prediction - layer_2)**2)
        correct_count += int(np.argmax(layer_2) == np.argmax(labels[i:i+1]))
        
        
        layer_2_delta = goal_prediction - layer_2
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu_derivative(layer_1)
        layer_1_delta *= dropout_mask
        
        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)
    
    if j % 10 == 0:
        test_error = 0.0
        test_correct_count = 0

        for i in range(len(test_images)):
            layer_0 = test_images[i:i+1]
            layer_1 = relu(layer_0.dot(weights_0_1))
            layer_2 = layer_1.dot(weights_1_2)

            test_error += np.sum((test_labels[i:i+1] - layer_2) ** 2)
            test_correct_count += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1])) 
        formatted_test_error = str(test_error / float(len(test_images)))[:5]
        formatted_test_correct = str(test_correct_count / float(len(test_images)))
    
        formatted_error = str(error / float(len(images)))[:5]
        formatted_correct = str(correct_count / float(len(images)))

        sys.stdout.write(f"[{j}]: Test-err: {formatted_test_error}, Test-Acc: {formatted_test_correct}, Error: {formatted_error}, Correct: {formatted_correct}\n")

# Batched Gradient Descent

Dropout involves randomly turning off a percenta

In [11]:
import numpy as np
np.random.seed(1)

relu = lambda x: (x >= 0) * x
relu2deriv = lambda output: output >= 0

batch_size = 100
alpha, iterations = (0.001, 300)
pixels_per_image, num_labels, hidden_size = (784, 10, 100)

weights_0_1 = 0.2 * np.random.random((pixels_per_image, hidden_size)) - 0.1
weights_1_2 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1

for j in range(iterations):
    error, correct_count = (0.0, 0)

    for i in range(int(len(images) / batch_size)):
        batch_start, batch_end = ((i * batch_size), (i+1) * batch_size)

        layer_0 = images[batch_start:batch_end]
        layer_1 = relu(np.dot(layer_0, weights_0_1))

        dropout_mask = np.random.randint(2, size=layer_1.shape)
        # Turn off and magnify it by (1/p)
        layer_1 *= dropout_mask * 2
        layer_2 = np.dot(layer_1, weights_1_2)

        goal_predictions = labels[batch_start:batch_end]
        error += np.sum((labels[batch_start:batch_end] - layer_2) ** 2)
        for k in range(batch_size):
            correct_count += int(np.argmax(layer_2[k:k+1]) == np.argmax(labels[batch_start+k:batch_start+k+1]))

            layer_2_delta = (goal_predictions - layer_2) / batch_end
            layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)
            layer_1_delta *=  dropout_mask

            weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
            weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)
    
    if j % 10 == 0:
        test_error = 0.0
        test_correct_count = 0

        for i in range(len(test_images)):
            layer_0 = test_images[i:i+1]
            layer_1 = relu(np.dot(layer_0, weights_0_1))
            layer_2 = np.dot(layer_1, weights_1_2)

            test_error += np.sum((test_labels[i:i+1] - layer_2) ** 2)
            test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))
        
        sys.stdout.write("\n" + \
                         "I:" + str(j) + \
                         " Test-Err:" + str(test_error/ float(len(test_images)))[0:5] +\
                         " Test-Acc:" + str(test_correct_cnt/ float(len(test_images)))+\
                         " Train-Err:" + str(error/ float(len(images)))[0:5] +\
                         " Train-Acc:" + str(correct_cnt/ float(len(images))))


I:0 Test-Err:0.974 Test-Acc:0.5828 Train-Err:1.435 Train-Acc:0.547
I:10 Test-Err:0.675 Test-Acc:1.1931 Train-Err:0.734 Train-Acc:0.547

In [None]:
import numpy as np
np.random.seed(1)

def relu(x):
    return (x >= 0) * x # returns x if x > 0

def relu2deriv(output):
    return output >= 0 # returns 1 for input > 0

batch_size = 100
alpha, iterations = (0.001, 300)
pixels_per_image, num_labels, hidden_size = (784, 10, 100)

weights_0_1 = 0.2*np.random.random((pixels_per_image,hidden_size)) - 0.1
weights_1_2 = 0.2*np.random.random((hidden_size,num_labels)) - 0.1

for j in range(iterations):
    error, correct_cnt = (0.0, 0)
    for i in range(int(len(images) / batch_size)):
        batch_start, batch_end = ((i * batch_size),((i+1)*batch_size))

        layer_0 = images[batch_start:batch_end]
        layer_1 = relu(np.dot(layer_0,weights_0_1))
        dropout_mask = np.random.randint(2,size=layer_1.shape)
        layer_1 *= dropout_mask * 2
        layer_2 = np.dot(layer_1,weights_1_2)

        error += np.sum((labels[batch_start:batch_end] - layer_2) ** 2)
        for k in range(batch_size):
            correct_cnt += int(np.argmax(layer_2[k:k+1]) == np.argmax(labels[batch_start+k:batch_start+k+1]))

            layer_2_delta = (labels[batch_start:batch_end]-layer_2)/batch_size
            layer_1_delta = layer_2_delta.dot(weights_1_2.T)* relu2deriv(layer_1)
            layer_1_delta *= dropout_mask

            weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
            weights_0_1 += alpha * layer_0.T.dot(layer_1_delta)
            
    if(j%10 == 0):
        test_error = 0.0
        test_correct_cnt = 0

        for i in range(len(test_images)):
            layer_0 = test_images[i:i+1]
            layer_1 = relu(np.dot(layer_0,weights_0_1))
            layer_2 = np.dot(layer_1, weights_1_2)

            test_error += np.sum((test_labels[i:i+1] - layer_2) ** 2)
            test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[i:i+1]))

        sys.stdout.write("\n" + \
                         "I:" + str(j) + \
                         " Test-Err:" + str(test_error/ float(len(test_images)))[0:5] +\
                         " Test-Acc:" + str(test_correct_cnt/ float(len(test_images)))+\
                         " Train-Err:" + str(error/ float(len(images)))[0:5] +\
                         " Train-Acc:" + str(correct_cnt/ float(len(images))))