# Digit dataset backpropagation
By Gerrit van de Bunt, 1756708, 2020-2021\
In this short notebook we will analyse the digit dataset and, based on this analysis, configure a Neural Network using our own code to train upon and classify the entries in this dataset.

# Step 0
Import the necessary tools.\
**Note: Due to jupyter notebook importing issues, we will be using a couple of cells to define all the code in our notebook, which drastically increases it's size.** 

In [6]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
import seaborn as sns
import pandas
import random

In [7]:
# Neuron class. Takes a list of inputs, applies a list of weights to them, adds a bias,
# then applies the sum of that to the activation function and returns an output.

from typing import List, Union

class Neuron:
    """Neuron class. To initialise, takes a list of weights, an activation function (normally Sigmoid().activate()), and a bias (optional).
    Once initialised, can be activated by giving a list of inputs (with equal elements to the amount of weights)"""
    def __init__(self, weights: List[Union[int, float]], activation: callable, ID=0, bias: Union[int,float] = 0.0):
        """Initialises the neuron."""
        # FUNCTIONAL VARIABLES (Private)
        self.__weights = weights
        self.__activation = activation
        self.__bias = bias

        self.__newweights = []
        self.__newbias = 0

        # LOGGING VARIABLES (Public)
        self.ID = ID  # Identifier for Perceptron, for debugging.

        self.error = 0  # Error calculated by the error methods.

        self.hasrun = False  # Whether the neuron has been activated or not.
        self.input = []  # Inputs of previous activations
        self.output = []  # Output of previous activations

    def getweights(self) -> List[Union[int, float]]:
        """Returns the current weights."""
        return self.__weights

    def setweights(self,weights: List[Union[int, float]]):
        """Changes the weights on this neuron by using a supplied weightslist.
        For proper use in the PerceptronLayer class, the input has to have the same
        amount of elements as the original weights list."""
        if not len(weights) == len(self.getweights()):
            raise Exception("Amount of supplied weights does not equal the amount of current weights @ Perceptron {}".format(self.ID))
        self.__weights = weights

    def getactivation(self) -> callable:
        """Returns the current activation function."""
        return self.__activation

    def setactivation(self, func: callable):
        """Changes the activation function on this neuron."""
        self.__activation = func

    def getbias(self) -> Union[int, float]:
        """Returns the current bias for this neuron."""
        return self.__bias

    def setbias(self, b: Union[int, float]):
        """Changes the current bias on this neuron."""
        self.__bias = b

    def activate(self,inputs: List[Union[int, float]]) -> Union[int,float]:
        """Activates the Perceptron by supplying inputs."""
        # RESETS
        self.hasrun = False
        # PRECHECKS
        if not len(inputs) == len(self.__weights):
            raise Exception("Amount of inputs is not equal to the amount of weights @ Perceptron {}".format(self.ID))
        # PROCESSING
        weightedlist = []  # List with processed inputs (input*weight)

        for indx in range(len(self.__weights)):
            weightedlist.append(self.__weights[indx] * inputs[indx])

        output = self.__activation(sum(weightedlist) + self.__bias)
        # Consider evaluation succesful past this point; get logging variables.
        self.hasrun = True
        self.input.append(inputs)
        self.output.append(output)

        return output

    def erroroutput(self, target: Union[int,float], learningrate: Union[int,float]):
        """Calculates the error of an output neuron."""
        if not self.hasrun:
            raise Exception("Run the Neuron first! @ Neuron {}".format(self.ID))
        gradients = []
        deltaweights = []
        deltabias = 0
        newweights = []
        newbias = 0
        # Bepaal de error
        output = self.output[-1]
        error = output * (1-output) * -(target-output)
        for inp in self.input[-1]:
            gradients.append(inp * error)  # De output van een voorgaande node is gelijk aan de input op deze node op de relevante index
        for grad in gradients:
            deltaweights.append(learningrate * grad)
        deltabias = learningrate * error

        self.error = error
        self.__newweights = [self.__weights[i] - deltaweights[i] for i in range(len(self.getweights()))]
        self.__newbias = self.getbias() - deltabias

    def errorhidden(self, connections: List[Union[int,float]], errors: List[Union[int,float]], learningrate: Union[int,float]):
        """Calculates the error of a hidden layer neuron"""
        if not self.hasrun:
            raise Exception("Run the Neuron first! @ Neuron {}".format(self.ID))
        if len(connections) != len(errors):
            raise Exception("Amount of connections from this neuron should equal the amount of errors from neurons @ Neuron {}".format(self.ID))
        gradients = []
        deltaweights = []
        deltabias = 0
        newweights = []
        newbias = 0
        sum = 0  # Sum of (Wi,j * Delta(j))
        # Bepaal de error
        output = self.output[-1]
        for i in range(len(connections)):  # Bepaal eerst de som van de vermenigvuldigingen tussen de verbindingen en de errors.
            sum += connections[i] * errors[i]
        error = output * (1-output) * sum  # Bepaal dan uiteindelijk de error.
        for inp in self.input[-1]:
            gradients.append(inp * error)  # De output van een voorgaande node is gelijk aan de input op deze node op de relevante index
        for grad in gradients:
            deltaweights.append(learningrate * grad)
        deltabias = learningrate * error

        self.error = error
        self.__newweights = [self.__weights[i] - deltaweights[i] for i in range(len(self.getweights()))]
        self.__newbias = self.getbias() - deltabias

    def update(self):
        """Updates the weights and bias using stored new weights and bias."""
        self.setbias(self.__newbias)
        self.setweights(self.__newweights)

    def __str__(self) -> str:
        """Returns a string representing the object and it's variables."""
        output = ""
        output += "NEURON ID: {}\n\n".format(self.ID)

        output += "WEIGHTS: {}\n".format(self.getweights())
        output += "ACTIVATION: {}\n".format(self.getactivation().__name__)
        output += "BIAS: {}\n".format(self.getbias())

        if self.hasrun:
            output += "SUCCESFUL ACTIVATION \n\n".format(self.hasrun)
            output += "INPUT: {}\n".format(self.input)
            output += "OUTPUT: {}\n".format(self.output)
        else:
            output += "ACTIVATION PENDING/FAILED\n"

        return output

In [8]:
# PerceptronLayer defines the layers in the network. This is where the Neuron Class is used.

# By default, all the Layers must be connected, so each neuron in the network must have equally as many
# weights as one another, and the amount of weights on all perceptrons should equal the amount of connections from one layer to the
# other on a per-neuron basis.

from typing import List, Union, Any

class NeuronLayer:
    """Defines a layer in a NeuronNetwork."""
    def __init__(self, neurons: List[Neuron], ID: Any = 0):
        self.neurons = neurons
        self.outputs = []

    def activate(self, inputlist: List[Union[int, float]]):
        """Runs the inputlist through all perceptrons of the network and saves the output."""
        self.outputs = []
        for i in self.neurons:
            i.activate(inputlist)
            self.outputs.append(i.output[-1])

In [9]:
# The NeuronLayer houses **all** the layers of the network.

from typing import List, Union, Any  # Onschuldige library die alleen beter laat zien wat voor soorten inputs er verwacht worden.

class NeuronNetwork:
    """Defines the neuron network; wraps all the given layers into this network."""
    def __init__(self, layers: List[NeuronLayer], learningrate: Union[int,float] = 0.3, ID: Any = 0,):
        """Initialises a neuron network. Handles the connections between the layers."""
        self.hiddenlayers = layers
        self.learningrate = learningrate
        self.input = []
        self.output = []

        self.ID = ID
        self.hasrun = False



    def feed_forward(self, inputs: List[Union[int,float]]) -> List[Union[int,float]]:
        """Starts the network, feeds in the inputs, runs it through all the layers and returns the output
        of the final layer."""
        self.hasrun = False
        totalinputs = inputs.copy()  # Keep both lists unlinked; original list will be saved for debugging.
        for layer in self.hiddenlayers:
            layer.activate(totalinputs)
            totalinputs = layer.outputs.copy()  # Same deal here

        self.input = inputs
        self.output = totalinputs
        self.hasrun = True

        return totalinputs

    def backpropagation(self, actualoutput):
        outputlayer = self.hiddenlayers[-1]

        if len(outputlayer.neurons) != len(actualoutput):
            raise Exception("Not enough outputs for each neuron in the output layer @ NeuronNetwork {}".format(self.ID))

        for i in range(len(outputlayer.neurons)):
            outputlayer.neurons[i].erroroutput(actualoutput[i],self.learningrate)
        # Nu komt het lastige gedeelte...
        # Amount of connections is equal to the amount of neurons in the previous layer!!
        # Currentlayer (i) : Target to call .errorhidden() on. Also get index in the neuron list.
        # Nextlayer (i+1) : Target to get weights from. Use index acquired in the previous layer.
        for lindx in range(len(self.hiddenlayers)-2,-1,-1):
            for i in range(len(self.hiddenlayers[lindx].neurons)):
                weights = []
                errors = []
                for neuron in self.hiddenlayers[lindx+1].neurons:
                    weights.append(neuron.getweights()[i])  # Gets the weights that this neuron connects to on neurons in the next layer
                    errors.append(neuron.error)  # Gets the error at the same time.
                self.hiddenlayers[lindx].neurons[i].errorhidden(weights,errors,self.learningrate)

    def update(self):
        """Updates all the weights and biases in the network immediately, given
        that all neurons have had their """
        for layer in self.hiddenlayers:
            for neuron in layer.neurons:
                neuron.update()

    def train(self, inputs: List[List[int]], actualoutputs: List[List[int]], epochs: int = 40, errortreshold: float = 0.1) -> None:
        error = errortreshold+1
        while epochs > 0 and error >= errortreshold:
            for i in range(len(inputs)):
                self.feed_forward(inputs[i])
                self.backpropagation(actualoutputs[i])
                self.update()
                error = self.error(inputs,actualoutputs)  # MSE
                if error < errortreshold:
                    break
            epochs -= 1

    def error(self, inputs: List[List[Union[int,float]]], actualoutputs: List[List[Union[int,float]]]) -> float:
        """Calculates the MSE of this network's output layer over a training set."""
        outputs = []
        sumoutputs = []
        for i in range(len(inputs)):
            self.feed_forward(inputs[i])
            outputs.append(self.output)
            # Verwijder ook hier weer de resulterende inputs en outputs, die willen we niet; error moet gezien
            # worden als een functie zonder side-effects.
            for neuronlayer in self.hiddenlayers:
                for neuron in neuronlayer.neurons:
                    del neuron.output[-1]
                    del neuron.input[-1]

        for i1 in range(len(outputs)):
            for i2 in range(len(outputs[i1])):
                sumoutputs.append((actualoutputs[i1][i2] - outputs[i1][i2])**2)

        return sum(sumoutputs) / len(outputs)

    def __str__(self):
        """Tries to print out the network in a readable manner.
        Additional information is available once the network has been run once."""
        output = ""
        output += "NEURONNETWORK ID: {}\n".format(self.ID)
        if self.hasrun:
            output += "INPUT: {}\nV\n".format(self.input)
        for layer in self.hiddenlayers:
            for i in layer.neurons:
                output += "[{} + {}]\n".format([round(x,4) for x in i.getweights()],round(i.getbias(),4))
            if self.hasrun:
                output += "OUTPUT: {}\n".format(layer.outputs)
            output += "V\n"
        if self.hasrun:
            output += "FINAL OUTPUT: {}\n".format(self.output)
        else:
            output += "ACTIVATION PENDING\n"
        return output


# Stap 1 - Data collection
Importeer de dataset en kijk naar de eerste gegevens

In [12]:
data = load_digits(as_frame=True)
frame = data["frame"]
frame

Unnamed: 0,pixel_0_0,pixel_0_1,pixel_0_2,pixel_0_3,pixel_0_4,pixel_0_5,pixel_0_6,pixel_0_7,pixel_1_0,pixel_1_1,...,pixel_6_7,pixel_7_0,pixel_7_1,pixel_7_2,pixel_7_3,pixel_7_4,pixel_7_5,pixel_7_6,pixel_7_7,target
0,0.0,0.0,5.0,13.0,9.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,6.0,13.0,10.0,0.0,0.0,0.0,0
1,0.0,0.0,0.0,12.0,13.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,11.0,16.0,10.0,0.0,0.0,1
2,0.0,0.0,0.0,4.0,15.0,12.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,3.0,11.0,16.0,9.0,0.0,2
3,0.0,0.0,7.0,15.0,13.0,1.0,0.0,0.0,0.0,8.0,...,0.0,0.0,0.0,7.0,13.0,13.0,9.0,0.0,0.0,3
4,0.0,0.0,0.0,1.0,11.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.0,16.0,4.0,0.0,0.0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1792,0.0,0.0,4.0,10.0,13.0,6.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,2.0,14.0,15.0,9.0,0.0,0.0,9
1793,0.0,0.0,6.0,16.0,13.0,11.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,6.0,16.0,14.0,6.0,0.0,0.0,0
1794,0.0,0.0,1.0,11.0,15.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2.0,9.0,13.0,6.0,0.0,0.0,8
1795,0.0,0.0,2.0,10.0,7.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,5.0,12.0,16.0,12.0,0.0,0.0,9


In [14]:
sample = frame.sample(10)
sample

Unnamed: 0,pixel_0_0,pixel_0_1,pixel_0_2,pixel_0_3,pixel_0_4,pixel_0_5,pixel_0_6,pixel_0_7,pixel_1_0,pixel_1_1,...,pixel_6_7,pixel_7_0,pixel_7_1,pixel_7_2,pixel_7_3,pixel_7_4,pixel_7_5,pixel_7_6,pixel_7_7,target
1643,0.0,0.0,7.0,12.0,15.0,6.0,0.0,0.0,0.0,14.0,...,0.0,0.0,0.0,7.0,16.0,7.0,0.0,0.0,0.0,5
294,0.0,0.0,4.0,15.0,14.0,10.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,7.0,14.0,16.0,16.0,1.0,0.0,8
1132,0.0,0.0,12.0,16.0,15.0,6.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,13.0,16.0,16.0,15.0,1.0,0.0,9
1247,0.0,0.0,4.0,12.0,13.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,6.0,16.0,13.0,0.0,0.0,0.0,1
416,0.0,0.0,4.0,14.0,9.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,4.0,13.0,9.0,0.0,0.0,0.0,0
1356,0.0,0.0,0.0,12.0,14.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,10.0,14.0,15.0,2.0,0.0,9
1487,0.0,0.0,7.0,16.0,15.0,1.0,0.0,0.0,0.0,5.0,...,0.0,0.0,0.0,8.0,16.0,13.0,5.0,0.0,0.0,0
1319,0.0,2.0,16.0,16.0,16.0,16.0,3.0,0.0,0.0,1.0,...,0.0,0.0,3.0,16.0,16.0,16.0,5.0,0.0,0.0,5
853,0.0,4.0,16.0,15.0,1.0,0.0,0.0,0.0,0.0,6.0,...,0.0,0.0,3.0,16.0,16.0,16.0,16.0,8.0,0.0,2
474,0.0,0.0,0.0,9.0,14.0,2.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,9.0,15.0,15.0,10.0,0.0,6


De data gaat hier over de intensiteit van een kleur van een pixel (8x8: 0 tot 7 voor rijen en kolommen)