In [None]:
# MINST Dataset
# https://deepai.org/dataset/mnist

import matplotlib.pyplot as plt
import numpy as np
from random import randint
import copy

In [None]:
with open("mnist/train-images.idx3-ubyte", "r") as images,\
    open("mnist/train-labels.idx1-ubyte", "r") as labels:

    # --- Header (images)
    print('▶ Processing header')
    header_images = np.fromfile(images, dtype='>i', count=4) # big-endian integer (32-bit, 4 bytes)

    magic_number = header_images[0]
    img_count = header_images[1]
    row_count = header_images[2]
    col_count = header_images[3]
    pixel_count = row_count*col_count

    print(f"#images\t\t {img_count}")
    print(f"#rows\t\t {row_count}")
    print(f"#cols\t\t {col_count}")
    print(f"#pixels\t\t {pixel_count} ({row_count}*{col_count})")


    # --- Header (labels)
    header_labels = np.fromfile(labels, dtype='>i', count=2) # big-endian integer (32-bit, 4 bytes)
    magic_number = header_labels[0]
    label_count = header_labels[1]
    
    print(f"#labels\t\t {label_count}")


    # --- Prepare for training & start
    batch_size = 10
    print()
    print('▶ Processing images')
    model = NN([784, 16, 16, 10], batch_size, 0.1)

    # for i in range(60000/batch_size): # in range(label_count / batch_size)
    for i in range(2): # no. of total batches
        # --- Process one batch
        print(f'▶▶▶ Processing {i+1}. batch')
        for j in range(batch_size):
            offset = i*batch_size + j
            # Image
            img = np.fromfile(images, dtype=np.ubyte, count=28*28, offset=28*28*offset)
            plt.imshow(img.reshape((28, 28)), cmap="gray")

            # Label
            label = np.fromfile(labels, dtype=np.ubyte, count=1, offset=offset)[0]
            print(f"Processing image with label {label}")

            plt.show()

            model.train(img, label)

In [330]:
# --- Our model

# First layer:      784 neurons (pixels of our image with gray-scale values from 0..255)
# Hidden Layer 1:   16 neurons
# Hidden Layer 2:   16 neurons
# Output Layer:     10 layers (representing number 0..9)

# Layer1 ------------------- Layer2 ------------------- Layer3 ------------------- Layer4
# 784 (28*28)                  16                        16                   10 (digits 0-9)              <- #neurons
# input layer               hidden layer 1          hidden layer 2              output layer

#           784*16 weights                16*16 weights           16*10 weights                            <- #weights
#               16 biases                    16 biases               10 biases                             <- #biases
#          weights_2, biases_2         weights_3, biases_3      weights_4, biases_4

class NN:

    def __init__(self, layers, batch_size, step_size):
        """
        Inits the neural network.

        Parameters:
            layers: [ layer1_n, layer2_n, layer3_n, ... ]
        """
        # Set metadata
        if not len(layers) >= 2:
            raise TypeError(
                "Specify at least two layers for the neural network")
        self.batch_size = batch_size
        self.count = 0
        self.cost = 0
        self.step_size = step_size

        # Init neurons
        self.neurons_z = [np.empty(0)] + [np.empty(n) for n in layers[1:]]
        self.neurons = [np.empty(n) for n in layers]

        # Init weights & biases
        self.weights = [np.empty(0)] + [np.random.rand(len(self.neurons[i+1]), n2)
                                        for i, n2 in enumerate(layers[:-1])]
        self.biases = [np.empty(0)] + [np.random.rand(n) for n in layers[1:]]

        # Init desired changes for backpropagation
        self.weights_desired_changes = [np.empty(
            0)] + [np.empty((layers[i+1], n2)) for i, n2 in enumerate(layers[:-1])]
        self.biases_desired_changes = [
            np.empty(0)] + [np.empty(n) for n in layers[1:]]

    def train(self, input, desired):
        # more desired values than output neurons
        if desired >= len(self.neurons[-1]):
            raise ValueError(
                f"Desired value is {desired} but must be smaller than {len(self.neurons[-1])}")

        # Feed forward & Propagate back
        output = self._feed_forward(input)
        self._propagate_back(output, desired)

        # Calc cost
        desired = encode_one_hot(desired, len(output))
        self.cost += np.sum((outputs-desired)**2)

        # Learn after <batch_size> trainings
        self.count += 1
        if self.count == batch_size:
            # Get & reset cost
            self.cost /= batch_size  # average
            print(f"C={self.cost}")
            self.cost = 0

            # Learn
            self._learn()

            # Reset batch
            self.count = 0

    def _feed_forward(self, input):
        """Feed through neural network. Returns the output neurons"""
        # Checks
        if not isinstance(input, np.ndarray):
            raise TypeError(f"Input must be a ndarray")
        if not input.size == len(self.neurons[0]):
            raise TypeError(
                f"Input contains {input.size} entries, however the input layer is expecting {len(self.neurons[0])} entries")

        # Set input layer
        self.neurons[0] = input

        # Feed forward through all layers, start with layer l=1
        for l in range(1, len(self.neurons)):
            self.neurons_z[l] = self.weights[l] @ self.neurons[l -
                                                               1] + self.biases[l]
            self.neurons[l] = self._activation(self.neurons_z[l])

        # Return neurons of output layer
        return self.neurons[-1]

    def _propagate_back(self, output, desired):
        """
        Propagates back the results and saves desired changes to weights and biases.
        Note that we do not make any changes to weights or biases at this point yet (this is done in _learn()).
        """
        # Go backwards through the layers and calculate gradient
        delC_delActivation = None
        delZ_delWeights = None

        for l in range(len(self.neurons)-1, 0, -1):  # from layer l to 1
            if l == len(self.neurons)-1:  # output layer
                delC_delActivation = 2*(self.neurons[l]-desired)
            else:
                inner_prod = delC_delActivation * \
                    self._v_derive_activation(self.neurons_z[l+1])
                delC_delActivation = [
                    sum(inner_prod * self.weights[l+1][:, j]) for j in range(len(self.neurons[l]))
                ]

            delActivation_delZ = self._v_derive_activation(self.neurons_z[l])
            dot_term = np.array(
                [delC_delActivation * delActivation_delZ]).T  # column vector

            delZ_delWeights = np.array([self.neurons[l-1]])  # row vector

            delC_delWeights = dot_term @ delZ_delWeights
            self.weights_desired_changes[l] += delC_delWeights

    def _learn(self):
        """Do a gradient step after having gone through a mini batch"""
        for l, weights_l_desired_changes in enumerate(self.weights_desired_changes):
            # Get desired changes of weights & biases
            # (average over the desired changes of each sample in the mini batch)
            self.weights_desired_changes[l] = (weights_l_desired_changes / batch_size) # average
            self.weights_desired_changes[l] *= -1 # negative gradient
            self.weights_desired_changes[l] *= self.step_size # step size

            # Adjust weights & biases to "learn"
            self.weights[l] *= self.weights_desired_changes[l]


    def _activation(self, vector):
        # Sigmoid
        # return np.array([sigmoid(x) for x in vector])

        # ReLu
        return np.array([relu(x) for x in vector])

    def _derive_activation(self, x):
        """Calculates the derivative of the activation function"""
        if x > 0:
            return 1
        else:
            return 0

    def _v_derive_activation(self, x):
        """Vectorized version of _derive_activation"""
        return np.vectorize(self._derive_activation)(x)


# --- Test with sample model
# model = NN([16, 32])

# print(len(model.neurons_z))
# print(model.neurons_z[0].shape)
# print(model.neurons_z[1].shape)

# print(len(model.neurons))
# print(model.neurons[0].shape)
# print(model.neurons[1].shape)

# print(len(model.weights))
# print(model.weights[0].shape)
# print(model.weights[1].shape)

# print(len(model.biases))
# print(model.biases[0].shape)
# print(model.biases[1].shape)

# model = NN([784, 16, 16, 10], 10, 0.1)
# img_rand = np.random.randint(0, 256, size=(784,))
# for i in range(10):
#     model.train(img_rand, 2)


In [None]:
# Utility

def normalize(x):
    """Normalizes the given array."""
    return x / max(x)

def relu(x):
    return max(0.0, x)

def sigmoid(x):
    "Numerically stable sigmoid function."
    if x >= 0:
        z = np.exp(-x)
        return 1 / (1 + z)
    else:
        # if x is less than zero then z will be small
        # denominator can't be zero because it's 1+z
        z = np.exp(x)
        return z / (1 + z)

def encode_one_hot(x, size):
    return np.eye(size)[x]
