In [1]:
import csv
import warnings
warnings.filterwarnings('ignore')

import math
import matplotlib.pyplot as plt

import numpy as np
formatter = {'float': '{: 0.2f}'.format, 'int': '{: 0.2f}'.format}
formatter8 = {'float': '{: 0.8f}'.format, 'int': '{: 0.2f}'.format}

import time
import datetime
import random

In [2]:
def get_data(file_name):
    data_points = []
    data_labels = []
    with open(file_name, 'r') as f:
        reader = csv.reader(f)



        for row in list(reader)[1:]:
            label = [0 for _ in range(0, 10)]
            label[int(row[0])] = 1
            data_points.append([int(x) for x in row[1: ]])
            data_labels.append(label)

    print(file_name, ' : Data Points: ', len(data_points), ' x ', len(data_points[0]), ' : Data Labels: ', len(data_labels), ' x ', len(data_labels[0]))
    return data_points, data_labels


training_data_points, training_data_labels = get_data('mnist_train.csv')
testing_data_points, testing_data_labels = get_data('mnist_test.csv')

mnist_train.csv  : Data Points:  60000  x  784  : Data Labels:  60000  x  10
mnist_test.csv  : Data Points:  10000  x  784  : Data Labels:  10000  x  10


In [3]:
def sigmoid(x):
    return 1/ (1 + np.exp(-x))

def d_sigmoid(x):
    s = sigmoid(x)
    return  s * (1 - s)

In [134]:
"""
All Comments will use the following Example:
MNIST Number Recognition Project
Number of Cases - 60,000
Input Layer - 784 Nodes
Hidden Layer - 16 Nodes
Output Layer - 10 Nodes
"""

class NeuralNetwork_v2:

    """
    all_heights       - array    - heights of each layer - [784, 16, 10]
    num_layers        - int      - 3
    learning_rate     - int      - factor to adjust weights and biases
    activation_func   - function - limiter from 0 to 1
    d_activation_func - function - derivative of activation
    weights           - array    - weights of each layer, which are matrices [W_{layer0}, W_{layer1}]
                                 - W_{layer0} = Matrix_{16 x 784}, W_{layer1} = Matrix_{10 x 16}
    biases            - array    - biases of each layer, which are matrices [b_{layer0}, b_{layer1}]
                                 - b_{layer0} = Matrix_{16 x 784}, b_{layer1} = Matrix_{10 x 16}
    """
    def __init__(self, all_heights, learning_rate = 0.05, activation_func = sigmoid, d_activation_func = d_sigmoid, debug = False):
        self.all_heights = all_heights
        self.num_layers = len(all_heights)
        self.learning_rate = learning_rate
        self.activation_func = activation_func
        self.d_activation_func = d_activation_func

        self.weights = [None for _ in range(0, self.num_layers - 1)]
        self.biases = [None for _ in range(0, self.num_layers - 1)]
        for height_index in range(0, self.num_layers - 1):
            # next_layer_height x prev_layer_height
            self.weights[height_index] = np.random.randn(self.all_heights[height_index + 1], self.all_heights[height_index])
            # next_layer_height x 1
            self.biases[height_index]  = np.random.randn(self.all_heights[height_index + 1], 1)
        self.weights = np.array(self.weights)
        self.biases = np.array(self.biases)

        if debug:
            print(self)


    """
    num_cases - int - number of training cases
    training_cases - array - each case which contains an array of layers, and its desired output
                      - layer - array that contains the values at each layer [784 values, 16 values, 10 values]
                      - desired output - array that contains the desired output values [10 values]
    input_cases - array 2D - each row is one case of training data
    desired_output_cases - each row is one desired_output of each case of training data

    """
    def init_all_layers(self, input_cases, desired_output_cases, debug = False):
        self.num_cases = len(input_cases)
        self.training_cases = [None for _ in range(0, self.num_cases)]

        # Each Case, initialize each case's input layer then forward prop it
        for case_index in range(0, self.num_cases):
            input_layer = np.array([input_cases[case_index]]).T
            correct_output = np.array([desired_output_cases[case_index]]).T
            self.training_cases[case_index] = {'Layers' : [None for _ in range(0, self.num_layers)],
                                               'Desired_Output' : correct_output}

            self.training_cases[case_index]['Layers'][0] = input_layer
            self.training_cases = self.forward_prop(self.training_cases, case_index, debug)

        if debug:
            num_cases_str = 'Num Cases: ' + str(self.num_cases) + '\n'

            first_layer = self.training_cases[0]['Layers']
            layer_str = 'Layers: '
            for layer_index in range(0, len(first_layer) - 1):
                layer_str += str(first_layer[layer_index].shape) + ', '
            layer_str += str(first_layer[len(first_layer) - 1].shape) + '\n'

            output_str = 'Desired_Output: ' + str(self.training_cases[0]['Desired_Output'].shape) + '\n'

            print(num_cases_str + layer_str + output_str)


    def train(self, input_cases, correct_output_cases, iterations, debug = False):
        self.init_all_layers(input_cases, correct_output_cases, debug)
        if debug:
            print('0 : ', self.calculate_error(False))
        for i in range(0, iterations):
            self.update_NN(False)
            if debug:
                print(i + 1, ' : ', self.calculate_error(False))


    def forward_prop(self, cases, case_index, debug = False):
        layers = cases[case_index]['Layers']
        for layer_index in range(0, len(self.weights)):
            layers[layer_index + 1] = self.activation_func(self.weights[layer_index].dot(layers[layer_index]) + self.biases[layer_index])
        cases[case_index]['Layers'] = layers

        return cases


    def backward_prop(self, case_index, debug = False):
        curr_case = self.training_cases[case_index]
        curr_case_layers = curr_case['Layers']
        curr_case_desired_output = curr_case['Desired_Output']

        d_weights = [None for _ in range(0, len(self.weights))]
        d_biases = [None for _ in range(0, len(self.biases))]

        d_outer = 2 * (curr_case_layers[self.num_layers - 1] - curr_case_desired_output)

        d_activations = []
        for layer_index in range(self.num_layers - 2, -1, -1):
            d_activations.append(self.d_activation_func(self.weights[layer_index].dot(curr_case_layers[layer_index]) + self.biases[layer_index]))

        for layer_index in range(self.num_layers - 2, -1, -1):
            weights_v_layers = []
            for dec_layer_index in range(self.num_layers - 2, layer_index, -1):
                weights_v_layers.append(self.weights[dec_layer_index])
            weights_v_layers.append(curr_case_layers[layer_index])

            d_weight = d_outer
            d_bias = 0
            for weights_v_layers_index in range(0, len(weights_v_layers)):
                d_weight *= d_activations[weights_v_layers_index]
                if weights_v_layers_index == len(weights_v_layers) - 1:
                    d_bias = d_weight
                    d_weight = d_weight.dot(weights_v_layers[weights_v_layers_index].T)
                else:
                    d_weight = weights_v_layers[weights_v_layers_index].T.dot(d_weight)

            d_weights[layer_index] = self.learning_rate * d_weight
            d_biases[layer_index] = self.learning_rate * d_bias

        return d_weights, d_biases

    def update_NN(self, debug = False):
        d_weights, d_biases = self.backward_prop(0, debug)
        average_quotient = 1 / self.num_cases
        for case_index in range(1, self.num_cases):
            add_weights, add_biases = self.backward_prop(case_index, debug)
            for layer in range(0, len(add_weights)):
                d_weights[layer] += average_quotient * add_weights[layer]
                d_biases[layer] += average_quotient * add_biases[layer]

        for layer in range(0, len(add_weights)):
            self.weights[layer] -= d_weights[layer]
            self.biases[layer] -= d_biases[layer]


        correct = 0
        for case_index in range(0, self.num_cases):
            self.training_cases = self.forward_prop(self.training_cases, case_index, debug)
            if np.argmax(self.training_cases[case_index]['Layers'][self.num_layers - 1]) == np.argmax(self.training_cases[case_index]['Desired_Output']):
                correct += 1
        print(correct, ' / ', self.num_cases, ' - ', 100 * correct / self.num_cases, '%')



    """
    return sum( (calculated_output - desired_output)^2 )
    """
    def calculate_error_per_case(self, case_index, debug = False):
        calculated_output = self.training_cases[case_index]['Layers'][self.num_layers - 1]
        desired_output = self.training_cases[case_index]['Desired_Output']
        diff = calculated_output - desired_output
        diff_sq = diff ** 2
        error = np.sum(diff_sq)

        if debug:
            print('Calculated Output: \n' , calculated_output)
            print('Desired Output: \n' , desired_output)
            print('Error: ', error)

        return error


    def calculate_error(self, debug = False):
        error = 0
        for case_index in range(0, self.num_cases):
            error += self.calculate_error_per_case(case_index, debug)
        return error / self.num_cases


    def test(self, input_cases, correct_output_cases, testing_indices, debug = False):
        num_cases = len(input_cases)
        testing_cases = [None for _ in range(0, num_cases)]
        for case_index in range(0, num_cases):
            input_layer = np.array([input_cases[case_index]]).T
            correct_output = np.array([correct_output_cases[case_index]]).T
            testing_cases[case_index] = {'Layers' : [None for _ in range(0, self.num_layers)],
                                         'Desired_Output' : correct_output}
            testing_cases[case_index]['Layers'][0] = input_layer

        for test_index in testing_indices:
            testing_cases = self.forward_prop(testing_cases, test_index)
            print('Desired: ', np.argmax(testing_cases[test_index]['Desired_Output']), '\n',
                  'Calculation: ', np.argmax(testing_cases[test_index]['Layers'][self.num_layers - 1]))

    def __str__(self):

        all_heights_str = 'all_heights: ' + str(self.all_heights) + '\n'
        learning_rate_str = 'learning_rate: ' + str(self.learning_rate) + '\n'

        weight_str = 'weights: '
        bias_str = 'biases: '
        for layer_index in range(0, len(self.weights) - 1):
            weight_str += str(self.weights[layer_index].shape) + ', '
            bias_str += str(self.biases[layer_index].shape)    + ', '
        weight_str += str(self.weights[len(self.weights) - 1].shape) + '\n'
        bias_str += str(self.biases[len(self.weights) - 1].shape)    + '\n'

        return all_heights_str + learning_rate_str + weight_str + bias_str


NN_v2 = NeuralNetwork_v2([784, 16, 10], learning_rate=1, debug=True)
NN_v2.train(training_data_points[0 : 10], training_data_labels[0 : 10], 5, debug=True)
NN_v2.test(testing_data_points, testing_data_labels, [1], debug=False)

all_heights: [784, 16, 10]
learning_rate: 1
weights: (16, 784), (10, 16)
biases: (16, 1), (10, 1)

Num Cases: 10
Layers: (784, 1), (16, 1), (10, 1)
Desired_Output: (10, 1)

0 :  5.404121199044741
0  /  10  -  0.0 %
1  :  4.036124954504273
0  /  10  -  0.0 %
2  :  3.2033503326821218
0  /  10  -  0.0 %
3  :  2.490385898116353
1  /  10  -  10.0 %
4  :  2.2129747886737983
2  /  10  -  20.0 %
5  :  1.9646526356660037
Desired:  2 
 Calculation:  5
