In [42]:
#The purpose of this notebook is to implement a convolutional network with numpy
#Certain functions are reused from 'MNIST Classifier with Numpy.ipynb'

In [43]:
import numpy as np
from keras.datasets import mnist #Necessary to get MNIST dataset
np.random.seed(1)

In [44]:
(x_train, y_train), (x_test, y_test) = mnist.load_data() #Load into tuples

images, labels = (x_train[0:1000].reshape(1000,28*28) / 255, #take the first 1000 images for training and reshape them into 28,28 images, then scale them between 0 and 1
                y_train[0:1000])

one_hot_labels = np.zeros((len(labels),10)) #Create the label matrices
for i,l in enumerate(labels):
    one_hot_labels[i][l] = 1 #put a 1 in the matrix at the label location
labels = one_hot_labels


print(labels[0:3])

test_images = x_test.reshape(len(x_test),28*28) / 255 #Reshape the test images as well
test_labels = np.zeros((len(y_test),10))
for i,l in enumerate(y_test):
    test_labels[i][l] = 1

def tanh(x):
    return np.tanh(x) #implement the tanh activation function
def tanh2deriv(x):
    return 1 - (x ** 2) #derivative of tanh
def softmax(x):
    temp = np.exp(x)
    return temp / np.sum(temp, axis = 1, keepdims = True)#output / sum

alpha, iterations = (2,300) #Declare learning rate, epochs
pixels_per_image, num_labels = (784,10) #Declare pixels in an image (28*28) and the labels
batch_size = 128 #Number of examples per batch

[[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]


In [45]:
#Convolutional network specific information
input_rows = 28 #Size of a single image
input_cols = 28

kernel_rows = 3
kernel_cols = 3 #Size of a convolution. 3x3 subsection
num_kernels = 16

hidden_size = (( input_rows - kernel_rows) * #Declare hidden layer the size of the reduced image after convolution
              (input_cols - kernel_cols)) * num_kernels

kernels = 0.02 * np.random.random( (kernel_rows * kernel_cols, #initialize kernels between the size of the kernel rows and columns, scaled by 0.02
                                   num_kernels) ) - 0.01
weights_1_2 = 0.2 * np.random.random ((hidden_size, #Initialize second layer weights to be the size of the labels, scaled down
                                      num_labels)) - 1


In [46]:
#Functions for iterating over the image
def get_image_section(layer,row_from,row_to,col_from,col_to):
    section = layer[:,row_from:row_to,col_from:col_to] #section the size of the convolution
    return section.reshape(-1,1,row_to-row_from,col_to - col_from)


In [47]:
#np.random.seed(1)
for j in range(iterations): #Train the network
    correct_cnt = 0
    for i in range(int(len(images) / batch_size)): #For each batch
        batch_start, batch_end = ( (i * batch_size),( (i + 1) * batch_size))
        layer_0 = images[batch_start:batch_end] #Input the first batch
        layer_0 = layer_0.reshape(layer_0.shape[0],28,28) #Reshape as a 28,28 picture
        
        sects = list() #Hold each new section of the image
        for row_start in range(layer_0.shape[1] - kernel_rows): #Iterate over the image but stop before the kernel overflows
            for col_start in range(layer_0.shape[2] - kernel_cols):
                sect = get_image_section(layer_0, 
                                         row_start,
                                         row_start + kernel_rows,
                                         col_start, 
                                         col_start + kernel_cols)
                sects.append(sect) #Apend each section of the image to sects
                
        expanded_input = np.concatenate(sects,axis = 1) #Join each array along the column
        es = expanded_input.shape
        flattened_input = expanded_input.reshape(es[0] * es[1],-1) #-1 means infer from the length of the existing array
        
        kernel_output = flattened_input.dot(kernels) #Multiply the flattened input image section by the kernels
        layer_1 = tanh(kernel_output.reshape(es[0],-1)) #Finally, output layer 1 after reshaping the input
        dropout_mask = np.random.randint(2,size = layer_1.shape) #Regularize with dropout
        layer_1 *= dropout_mask * 2 #Amplify remaining weights to get the same output
        layer_2 = softmax(np.dot(layer_1,weights_1_2)) #Get the weighted probability of each image
        
        for k in range(batch_size): #increment over the batch
            label_set = labels[batch_start + k: batch_start + k + 1]
            counter = int(np.argmax(layer_2[k:k+1]) == 
                         np.argmax(label_set)) #compare each image to the correct label
            correct_cnt += counter
        
        #Start backpropagation of error
        layer_2_delta = (labels[batch_start:batch_end] - layer_2) \
                        / (batch_size * layer_2.shape[0]) #The delta is the error in the output layer divided by the number of images * possible outputs
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * \
                        tanh2deriv(layer_1) #backpropagate the errors from layer 2 to layer 1
        weights_1_2 += alpha * layer_1.T.dot(layer_2_delta) #update the weights in layer 2
        l1d_reshape = layer_1_delta.reshape(kernel_output.shape) #get the reshaped layer 1 delta to multiply into the kernel
        k_update = flattened_input.T.dot(l1d_reshape) #Get the delta for the kernels
        kernels -= alpha * k_update #Change the kernels by the given l1 delta
        
    
    #Test accuracy. Seems repetitive, but there are a few differences in reshaping, network shape
    test_correct_cnt = 0

    for i in range(len(test_images)): #Test the network against each test image
        layer_0 = test_images[i:i + 1] #Get the test image
        layer_0 = layer_0.reshape(layer_0.shape[0],28,28) #Reshape into a 28,28 image

        sects = list() #Hold each new section of the image
        for row_start in range(layer_0.shape[1] - kernel_rows): #Iterate over the image but stop before the kernel overflows
            for col_start in range(layer_0.shape[2] - kernel_cols):
                sect = get_image_section(layer_0, 
                                         row_start, 
                                         row_start + kernel_rows,
                                         col_start, 
                                         col_start + kernel_cols)
                sects.append(sect) #Apend each section of the image to sects

        expanded_input = np.concatenate(sects,axis = 1) #Join each array along the column
        es = expanded_input.shape
        flattened_input = expanded_input.reshape(es[0] * es[1],-1) #-1 means infer from the length of the existing array

        kernel_output = flattened_input.dot(kernels) #Multiply the flattened input image section by the kernels
        layer_1 = tanh(kernel_output.reshape(es[0],-1)) #Finally, output layer 1 after reshaping the input
        layer_2 = np.dot(layer_1,weights_1_2) #Get the weighted probability of each image

        test_correct_cnt += int(np.argmax(layer_2) == 
                               np.argmax(test_labels[i:i+1]))

    if(j % 1 == 0):
        print("\n"+ \
         "I:" + str(j) + \
         " Test-Acc:"+str(test_correct_cnt/float(len(test_images)))+\
         " Train-Acc:" + str(correct_cnt/float(len(images))))



I:0 Test-Acc:0.0286 Train-Acc:0.041


KeyboardInterrupt: 