In [16]:
# set up training and testing data
import numpy as np
from keras.datasets import mnist
np.random.seed(1)
(x_train, y_train), (x_test, y_test) = mnist.load_data()
input_rows, input_cols = (28, 28)
pixels_per_image = input_rows * input_cols
num_labels = 10 # [0, 9]

images =  x_train[0:1000].reshape(1000, pixels_per_image) / 255
categorical_labels = y_train[0:1000] # looks something like [2, 5, 1, 8, 0, ...]

one_hot_labels = np.zeros((len(categorical_labels), 10))
for batch, l in enumerate(categorical_labels):
    one_hot_labels[batch][l] = 1   # so if the number we are given is a 3, the label is [0, 0, 0, 1, 0, ...]
labels = one_hot_labels 

test_images = x_test.reshape(len(x_test), pixels_per_image) / 255
test_labels = np.zeros((len(y_test), 10))
for batch, l in enumerate(y_test):
    test_labels[batch][l] = 1

In [17]:
# define activation functions and their derivative functions
def tanh(x):
    return np.tanh(x)
def tanh_deriv(output):
    return 1 - (output ** 2)
def softmax(x):
    temp = np.exp(x)
    return temp / np.sum(temp, axis=1, keepdims=True)

In [18]:
# configure model
alpha = 2
iterations = 300
batch_size = 128
kernel_rows, kernel_cols = (3, 3)
num_kernels = 16

In [19]:
# initialize weights
hidden_size = ((input_rows - kernel_rows) * (input_cols - kernel_cols)) * num_kernels
kernel_weights = 0.02 * np.random.random((kernel_rows * kernel_cols, num_kernels)) - 0.01
weights_1_2 = 0.2 * np.random.random((hidden_size, num_labels)) - 0.1

### network shape: 
28 * 28 = 784  
convoluted layer with  16 3*3 kernel  
25 * 25 * 16 = 10,000 hidden layer  


In [20]:
def get_image_section(layer, row_from, row_to, col_from, col_to):
    section = layer[ : , row_from : row_to, col_from : col_to]
    return section.reshape(-1, 1, row_to - row_from, col_to - col_from)
# The first dimension (-1 in this case) represents the batch size. The use of -1 allows NumPy to automatically infer this dimension based on the size of the input array.
# The second dimension (1 in this case) represents the number of channels. For grayscale images, this is typically 1, while for RGB images, it would be 3.
# The third and fourth dimensions represent the height and width of the section extracted from the original image.
# from ChatGPT, might not be correct. I dont fully understand why the kernels are four dimensional

In [21]:
import sys
def printAccuracy(test_correct_cnt, correct_cnt, j):
    sys.stdout.write("\n" + \
            "I: " + str(j) + \
            " Test-Acc: " + str(test_correct_cnt / float(len(test_images))) + \
            " Train-Acc " + str(correct_cnt / float(len(images))))

In [22]:
def apply_convelution(layer_0):
    sections = list()
    for row_start in range(layer_0.shape[1] - kernel_rows):
        for col_start in range(layer_0.shape[2] - kernel_cols):
            sect = get_image_section(layer_0, row_start, row_start + kernel_rows, col_start, col_start + kernel_cols)
            sections.append(sect)

    expanded_input = np.concatenate(sections, axis=1)
    expanded_shape = expanded_input.shape
    flattened_input = expanded_input.reshape(expanded_shape[0] * expanded_shape[1], -1)

    kernel_output = flattened_input.dot(kernel_weights)
    return kernel_output, expanded_shape, flattened_input

In [23]:
for iteration in range(iterations):
    correct_cnt = 0
    for batch in range(int(len(images) / batch_size)):
        # get and format layer_0
        batch_start, batch_end = ((batch * batch_size), ((batch + 1) * batch_size))
        layer_0 = images[batch_start : batch_end]
        layer_0 = layer_0.reshape(layer_0.shape[0], 28, 28)

        # run neural network
        kernel_output, expanded_shape, flattened_input  = apply_convelution(layer_0) 
        layer_1 = tanh(kernel_output.reshape(expanded_shape[0], -1))
        dropout_mask = np.random.randint(2, size=layer_1.shape)
        layer_1 *= dropout_mask * 2
        layer_2 = softmax(np.dot(layer_1, weights_1_2))

        # get batch correctness
        for image_in_batch in range(batch_size):
            labelset = labels[batch_start + image_in_batch : batch_start + image_in_batch + 1]
            increment = int(np.argmax(layer_2[image_in_batch : image_in_batch + 1]) == np.argmax(labelset))
            correct_cnt += increment

        # calculate deltas
        layer_2_delta = (labels[batch_start : batch_end] - layer_2) / (batch_size * layer_2.shape[0])
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * tanh_deriv(layer_1)
        layer_1_delta *= dropout_mask

        # backpropagate
        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta)
        layer_1_derivative_reshape = layer_1_delta.reshape(kernel_output.shape)
        k_update = flattened_input.T.dot(layer_1_derivative_reshape)
        kernel_weights -= alpha * k_update

    test_correct_cnt = 0

    # run test set
    for image in range(len(test_images)):
            layer_0 = test_images[image:image+1]
            layer_0 = layer_0.reshape(layer_0.shape[0], 28, 28)

            kernel_output, expanded_shape, flattened_input = apply_convelution(layer_0)
            layer_1 = tanh(kernel_output.reshape(expanded_shape[0], -1))
            layer_2 = np.dot(layer_1, weights_1_2)
            test_correct_cnt += int(np.argmax(layer_2) == np.argmax(test_labels[image:image+1]))
    if(iteration % 1 == 0):
        printAccuracy(test_correct_cnt, correct_cnt, iteration)


I: 0 Test-Acc: 0.0288 Train-Acc 0.055
I: 1 Test-Acc: 0.0273 Train-Acc 0.037
I: 2 Test-Acc: 0.028 Train-Acc 0.037

KeyboardInterrupt: 