In [325]:
import numpy as np
from mnist_loader import load_data, load_data_wrapper, vectorized_result

# Neural Network class

In [326]:
class NeuralNetwork:
    def __init__(self, weight_sizes, epochs=10, c=0.1, batch=100):
        self.weight_sizes = weight_sizes
        self.epochs = int(epochs)
        self.c = c
        self.batch = batch if batch != 0 else 100
        self.layers = len(self.weight_sizes)
        self.weights = self.initialize_weights()

    def prepare_data(self, data):
        x_data, y_data = data
        new_x_data = np.concatenate((np.ones([x_data.shape[0], 1]), x_data), axis=1)
        y_len = len(y_data)
        new_y_data = list(np.zeros(y_len))
        for i in range(y_len):
            y = np.zeros(10)
            y[y_data[i]] = 1
            new_y_data[i] = y
        return new_x_data, new_y_data
        
    def initialize_weights(self):
        return [np.random.randn(self.weight_sizes[layer]+1, self.weight_sizes[layer+1])/10 \
                   for layer in range(self.layers-1)]
        
    def sigmoid(self, x):
        return (1/(1+np.exp(-x)))
    
    def sigmoid_derivative(self, x):
        sig_val = self.sigmoid(x)
        return np.multiply(sig_val, (1-sig_val))
    
    def activation_func(self, x, layer):
        if layer == self.layers:
            e_x = np.exp(x - np.max(x, axis=1, keepdims=True))
            return (e_x / e_x.sum(axis=1)[:,None])
        else:
            return self.sigmoid(x)
    
    def forward_pass(self, X):
        activations = list(np.zeros(self.layers))
        activations[0] = X
        nets = list(np.zeros(self.layers))

        for layer in range(1, self.layers):
            nets[layer] = activations[layer-1] @ self.weights[layer-1]
            activations[layer] = np.concatenate((np.ones([X.shape[0],1]), self.activation_func(nets[layer], layer+1)), axis=1)
        
        last_activ = activations[-1]
        activations[self.layers-1] = last_activ[:, 1:]

        return nets, activations
    
    def back_propagation(self, X, Y):
        nets, activations = self.forward_pass(X)

        deltas = list(np.zeros(self.layers))
        deltas[self.layers-1] = activations[self.layers-1] - Y
        for i in range(self.layers-2, 0, -1):
            deltas[i] = (deltas[i+1] @ self.weights[i][1:].T) * self.sigmoid_derivative(nets[i])

        gradients = [(deltas[i+1].T @ activations[i]) / X.shape[0] for i in range(len(deltas)-1)]
        updated_weights = [self.weights[j] - self.c*gradients[j].T for j in range(len(gradients))]
        return updated_weights
    
    def accuracy(self, X, Y):
        _, activations = self.forward_pass(X)
        last_acivation = activations[-1]
        prediction = np.argmax(last_acivation, axis=1)
        real_value = np.argmax(Y, axis=1)
        return np.mean(prediction == real_value)
    
    def train(self, training_data, test_data):

        x_train, y_train = self.prepare_data(training_data)
        x_test, y_test = self.prepare_data(test_data)

        for epoch in range(1, self.epochs+1):
            print(f"Epoch: {epoch}")
            slicer = np.array([0, self.batch])

            for i in range(x_train.shape[0] // self.batch):
                batch = slicer + i*self.batch
                new_weights = self.back_propagation(x_train[batch[0]:batch[1]], y_train[batch[0]:batch[1]])
                self.weights = new_weights
            
            accTrain = self.accuracy(x_train, y_train)
            accTest = self.accuracy(x_test, y_test)

            print(f"Training data accuracy: {accTrain}")
            print(f"Test data accuracy: {accTest}")

In [327]:
training_data, validation_data, test_data = load_data()

In [330]:
NN = NeuralNetwork([784, 200, 80, 10], epochs=10, c=0.1, batch=100)

In [331]:
NN.train(training_data, test_data)

Epoch: 1
Training data accuracy: 0.77996
Test data accuracy: 0.7887
Epoch: 2
Training data accuracy: 0.8633
Test data accuracy: 0.8737
Epoch: 3
Training data accuracy: 0.88464
Test data accuracy: 0.8903
Epoch: 4
Training data accuracy: 0.89624
Test data accuracy: 0.9009
Epoch: 5
Training data accuracy: 0.90286
Test data accuracy: 0.9072
Epoch: 6
Training data accuracy: 0.909
Test data accuracy: 0.9111
Epoch: 7
Training data accuracy: 0.9138
Test data accuracy: 0.9159
Epoch: 8
Training data accuracy: 0.9176
Test data accuracy: 0.9195
Epoch: 9
Training data accuracy: 0.92138
Test data accuracy: 0.9236
Epoch: 10
Training data accuracy: 0.925
Test data accuracy: 0.9266
