In [1]:
import numpy as np
import math
import csv
from sklearn.preprocessing import OneHotEncoder

# Neural Network Class

In [2]:
class neural_network:

    def __init__(self, n_input, n_output):
        self.input_size = n_input
        self.output_size = n_output
        self.layers = []
        self.current_last_n = self.input_size
        self.layer_outputs = []
        np.random.seed(1)

    def add_layer(self, n_neurons, activation="sigmoid"):
        # One more neuron than last layer because of bias
        weights = np.random.uniform(low=-0.3, high=0.3, size=(self.current_last_n+1, n_neurons))
        self.current_last_n = n_neurons
        
        # building layers
        hidden_layer = dict()
        hidden_layer["weights"] = weights
        hidden_layer["activation"] = activation
        self.layers.append(hidden_layer)

    def compile(self):
        # connect output layer with last hidden layer
        self.add_layer(self.output_size, activation="sigmoid")

    def predict(self, input):
        
        # add column of ones because of bias neuron
        input = np.array(input, dtype=float)
        r, c = input.shape
        input = np.c_[input, np.ones(r)]

        for layer in self.layers:
            output = np.dot(input, layer["weights"])
            output = self.activation(output, layer["activation"])
            
            # safe outputs of all layers for back propagation
            self.layer_outputs.append(output)

            # add column of ones because of bias neuron
            if layer != self.layers[-1]:
                r, c = output.shape
                output = np.c_[output, np.ones(r)]

            input = output

        return output

    def train(self, input, labels, epochs, learning_rate):
        input = np.array(input, dtype=float)
        labels = np.array(labels, dtype=float)

        for epoch in range(epochs+1):
            output = self.predict(input)
            delta = []
            
            # calculation for output layer is slightly different
            errors = np.array(self.layer_outputs[-1] - labels)
            derivatives = self.derivative(self.layer_outputs[-1], self.layers[-1]["activation"])
            delta.append(errors * derivatives)
            
            # go through all layers from back to front and calculate "deltas"
            for i in range(1, len(self.layer_outputs)):
                errors = np.dot(delta[-1], self.layers[-i]["weights"].T[:,:-1])
                derivatives = self.derivative(self.layer_outputs[-i-1], self.layers[-i]["activation"])
                delta.append(errors * derivatives)
            
            # update weights
            self.update_weights(input, delta, learning_rate)

            if epoch % 200 is 0:
                #print(output[:3])
                #print(labels[:3])
                #print(np.sum(output[0]))
                sum_error = np.sum((labels - output) ** 2)
                print(f"epoch: {epoch}, error: {sum_error}")



    def update_weights(self, input, delta, learning_rate):

        # add column of ones to input for bias neurons
        i = -1
        r, c = input.shape
        input = np.c_[input, np.ones(r)]

        for layer in self.layers:
            delta_values = delta.pop()

            dot_product = np.dot(input.T, delta_values)
            #print(delta_values.shape)
            #print(dot_product.shape)
            #print(layer["weights"].shape)
            layer["weights"] -= learning_rate * dot_product

            #if layer != self.layers[-1]:
            #layer["weights"][-1,:] -= learning_rate * delta_values[:, :-1].T
            i += 1
            input = self.layer_outputs[i]

            # add column of ones to input for bias neurons
            r, c = input.shape
            input = np.c_[input, np.ones(r)]

        self.layer_outputs = []

    def evaluate(self, inputs, labels):
        outputs = self.predict(inputs)
        outputs = (outputs == outputs.max(axis=1)[:,None])
        correct = np.sum((labels == outputs).all(1))
        total = len(inputs)

        print("\nCorrectly classified: ", correct)
        print("Total eval data: ", total)
        print("Accuracy: ", correct/total)

    def activation(self, inputs, activation):
        if activation is "sigmoid":
            sigmoid_v = np.vectorize(self.sigmoid)
            return sigmoid_v(inputs)
        elif activation is "relu":
            relu_v = np.vectorize(self.relu)
            return relu_v(inputs)
        elif activation is "softmax":
            return self.softmax(inputs)

    def derivative(self, inputs, activation):
        if activation is "sigmoid":
            sigmoid_derivative_v = np.vectorize(self.sigmoid_derivative)
            return sigmoid_derivative_v(inputs)
        elif activation is "relu":
            relu_derivative_v = np.vectorize(self.relu_derivative)
            return relu_derivative_v(inputs)
        elif activation is "softmax":
            return self.softmax_derivative(inputs)

    def error(self, actual, expected):
        return

    def sigmoid(self, x):
        x = 10 if x > 10 else x
        x = -10 if x < -10 else x
        sig = 1 / (1 + math.exp(-x))
        return sig

    def sigmoid_derivative(self, x):
        return x * (1.0 - x)

    def relu(self, x):
        return max(0.0, x)

    def relu_derivative(self, x):
        return (x > 0) * 1

    def softmax(self, x):
        return np.exp(x) / np.sum(np.exp(x), axis=0)

    def softmax_derivative(self, x):
        I = np.eye(x.shape[1], x.shape[0])

        return self.softmax(x) * (I - self.softmax(x).T).T

In [3]:
inputs = []
labels = []

# Importing MNIST dataset
with open('mnist.csv') as csvfile:
    data = csv.reader(csvfile)
    next(data, None)
    for row in data:
        labels.append(row.pop(0))
        inputs.append(list(map(int, row)))
    
    print(inputs[0])
    print("Label:", labels[0])

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 84, 185, 159, 151, 60, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 222, 254, 254, 254, 254, 241, 198, 198, 198, 198, 198, 198, 198, 198, 170, 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 67, 114, 72, 114, 163, 227, 254, 225, 254, 254, 254, 250, 229, 254, 254, 140, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 66, 14, 67, 67, 67, 59, 21, 236, 254, 106, 0, 0, 0, 

In [4]:
# Encoding Labels
input_size = len(inputs[0])
encoder = OneHotEncoder(sparse=False)
onehot_encoded_labels = encoder.fit_transform(np.array(labels).reshape(-1,1))
onehot_encoded_labels = np.array(onehot_encoded_labels, dtype=float)

# Scaling Input Data
inputs = np.array(inputs, dtype=float)
inputs = inputs / 256 

In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


In [5]:
# Building Neural Network

network = neural_network(input_size, 10)
network.add_layer(8, activation="sigmoid")
network.add_layer(4, activation="sigmoid")
network.compile()

In [6]:
# Training

np.random.seed(1)
network.train(inputs[:4000], onehot_encoded_labels[:4000], 4000, 0.01)



epoch: 0, error: 9894.570631834442
epoch: 200, error: 3598.6364652866323
epoch: 400, error: 3598.5688168361858
epoch: 600, error: 3598.5147529965966
epoch: 800, error: 3598.3339947913837
epoch: 1000, error: 3648.951881344056
epoch: 1200, error: 3595.0742700451856
epoch: 1400, error: 3132.9005533752133
epoch: 1600, error: 2257.2537982981607
epoch: 1800, error: 1667.4983648933248
epoch: 2000, error: 1482.7011602746675
epoch: 2200, error: 1169.2182529325933
epoch: 2400, error: 1263.6317202546427
epoch: 2600, error: 979.8990791686776
epoch: 2800, error: 914.1105381596312
epoch: 3000, error: 745.1766351183812
epoch: 3200, error: 754.1551043267319
epoch: 3400, error: 929.3961800990985
epoch: 3600, error: 697.7994774457461
epoch: 3800, error: 755.3743495307857
epoch: 4000, error: 361.11963596215946


In [7]:
# Evaluation

network.evaluate(inputs[8000:], onehot_encoded_labels[8000:])


Correctly classified:  1625
Total eval data:  2000
Accuracy:  0.8125


