In [1]:
import numpy as np
import scipy.special
import matplotlib.pyplot
%matplotlib inline

In [2]:
class NeuralNetwork:
    
    def __init__(self, inputnodes, hiddennodes, outputnodes, numlayers, learningrate):
        self.inodes = inputnodes
        self.hnodes = hiddennodes
        self.onodes = outputnodes
        
        self.hls = numlayers
        
        self.lr = learningrate
        
        self.weights = [np.random.normal(0.0, pow(self.hnodes, -0.5), (self.hnodes, self.inodes))]
        if self.hls-3 > 0:
            for i in range(self.hls-3):
                self.weights.append(np.random.normal(0.0, pow(self.hnodes, -0.5), (self.hnodes, self.hnodes)))
        self.weights.append(np.random.normal(0.0, pow(self.onodes, -0.5), (self.onodes, self.hnodes)))
        
        self.activiation_function = lambda x: scipy.special.expit(x)
        self.d_activation_function = lambda x: self.activiation_function(x) * (1-self.activiation_function(x))
    
    def train(self, inputs_list, targets_list):
        targets = np.array(targets_list, ndmin = 2).T
        inputs = np.array(inputs_list, ndmin = 2).T
        
        iter_inputs = inputs.copy()
        layer_values = []
        
        for w in self.weights:
            iter_inputs = self.activiation_function(np.dot(w, iter_inputs))
            layer_values.append(iter_inputs)
        #print(inputs)
        
        '''error = [targets - layer_values[-1]]
        for i in range(len(self.weights)-2, 0, -1):
            error.append(np.dot(self.weights[i], error))'''
        def get_errors(error_list, prev_error, i):
            if i < -len(self.weights):
                return error_list
            error = np.dot(self.weights[i].T, prev_error)
            error_list.append(error)
            return get_errors(error_list, error, i-1)
        errors = get_errors([targets - layer_values[-1]], targets - layer_values[-1], -1)
        errors = errors[::-1]
        for error in errors:
            #print(np.array(error).shape)
            pass
        for i in range(-1, -len(self.weights), -1):
            #print(i)
            #print(f"weights: {self.weights[i].shape}, layer i: {layer_values[i].shape}, layer i-1: {layer_values[i-1].shape}")
            #print(f"error: {errors[i].shape}")
            self.weights[i] += self.lr * np.dot((errors[i] * self.d_activation_function(layer_values[i])), layer_values[i-1].T)
        #print(f"weights: {self.weights[0].shape}, layer 0: {layer_values[0].shape}, inputs: {inputs.shape}")
        #print(f"error: {errors[1].shape}")
        self.weights[0] += self.lr * np.dot((errors[1] * self.d_activation_function(layer_values[0])), inputs.T)
            
        '''error = None
        for layer_num in range(len(self.weights)-1, 0, -1):
            print(layer_num)
            if error is None:
                error = targets - layer_values[-1]
            self.weights[layer_num] += self.lr * np.dot((error * self.d_activation_function(layer_values[layer_num])), layer_values[layer_num-1].T)
            #print(self.lr * np.dot((error * self.d_activation_function(layer_values[layer_num])), layer_values[layer_num-1].T))
            error = np.dot(self.weights[layer_num].T, error)
            print(error.shape)
        self.weights[0] += self.lr * np.dot((error * self.d_activation_function(layer_values[0])), inputs.T)'''
        
        '''l2_error=targets - layer_values[2]
        l2_delta=l2_error*self.d_activation_function(layer_values[2])
        self.weights[2]+=(self.lr*np.dot(l2_delta, layer_values[1].T))
        l1_error=np.dot(l2_delta, self.weights[1].T)
        l1_delta=l1_error*self.d_activation_function(layer_values[1])
        self.weights[1]+=(self.lr*np.dot(layer_values[0].T, l1_delta))
        inl_error=np.dot(l1_delta, self.weights[0].T)
        inl_delta=inl_error*self.d_activation_function(layer_values[0])
        self.weights[0]+=(self.lr*np.dot(inputs.T, inl_delta))'''
        
        '''error = targets - layer_values[-1]
        print(error)
        prev_delta = error * self.d_activation_function(layer_values[-1])
        self.weights[-1] += self.lr*np.dot(layer_values[-1].T, prev_delta)
        for layer in range(len(self.weights)-2, 0, -1):
            print(f"layer = {layer}")
            print(f"prev_delta shape = {prev_delta.shape}, layer_values[layer].shape = {layer_values[layer].shape}")
            print(f"weights[layer].shape = {self.weights[layer].shape}")
            error = np.dot(prev_delta, self.weights[layer].T)
            print(f"error.shape = {error.shape}, layer_values[layer].shape = {layer_values[layer].shape}")
            prev_delta = error*self.d_activation_function(layer_values[layer].T)
            self.weights[layer] += self.lr*np.dot(prev_delta, layer_values[layer])
        self.weights[0] += self.lr*np.dot(inputs, prev_delta)'''
        
    
    def query(self, inputs_list):
        inputs = np.array(inputs_list, ndmin = 2).T
        
        for w in self.weights:
            inputs = self.activiation_function(np.dot(w, inputs))
        
        '''hidden_inputs = np.dot(self.wih, inputs)
        hidden_outputs = self.activiation_function(hidden_inputs)
        final_inputs = np.dot(self.who, hidden_outputs)
        final_outputs = self.activiation_function(final_inputs)'''
        
        return inputs

In [7]:
input_nodes = 784
hidden_nodes = 500
output_nodes = 10
num_layers = 5
learning_rate = 0.1

nn = NeuralNetwork(input_nodes, hidden_nodes, output_nodes, num_layers, learning_rate)

data_file = open("mnist_train.csv", "r")
training_list = data_file.readlines()
data_file.close()

test_file = open("mnist_test.csv", 'r')
test_list = test_file.readlines()
test_file.close()

In [8]:
#train
for i in range(50):
    for image in training_list:
        all_values = image.split(",")
        inputs = np.asfarray(all_values[1:]) / 255 * 0.99 + 0.01
        targets = np.zeros(output_nodes) + 0.01
        targets[int(all_values[0])] = 0.99
        nn.train(inputs, targets)

    #test
    scorecard = []
    for test_image in test_list:
        all_values = test_image.split(",")
        expected = int(all_values[0])
        received = int(np.argmax(nn.query(np.asfarray(all_values[1:]) / 255 * 0.99 + 0.01)))
        scorecard.append(expected == received)

    #print(scorecard)
    print(f"score = {sum(scorecard)/len(scorecard)} for epoch {i}")

score = 0.7631 for epoch 0
score = 0.5299 for epoch 1
score = 0.3919 for epoch 2
score = 0.5129 for epoch 3
score = 0.6396 for epoch 4
score = 0.6731 for epoch 5
score = 0.6957 for epoch 6
score = 0.7038 for epoch 7
score = 0.7204 for epoch 8
score = 0.7202 for epoch 9
score = 0.7406 for epoch 10
score = 0.7572 for epoch 11
score = 0.7605 for epoch 12
score = 0.7665 for epoch 13
score = 0.7659 for epoch 14
score = 0.7804 for epoch 15
score = 0.7794 for epoch 16
score = 0.7811 for epoch 17
score = 0.7934 for epoch 18
score = 0.7814 for epoch 19
score = 0.7989 for epoch 20
score = 0.7938 for epoch 21
score = 0.7948 for epoch 22
score = 0.7868 for epoch 23
score = 0.7953 for epoch 24
score = 0.7835 for epoch 25
score = 0.7792 for epoch 26
score = 0.7894 for epoch 27
score = 0.7878 for epoch 28
score = 0.7918 for epoch 29
score = 0.7952 for epoch 30
score = 0.8012 for epoch 31
score = 0.8018 for epoch 32
score = 0.8026 for epoch 33
score = 0.8005 for epoch 34
score = 0.7821 for epoch 35
sc