In [3]:
import numpy as np
import pandas as pd

In [4]:
def oneHot(Y):
    one_hot_Y = np.zeros((Y.size, Y.max() + 1))
    one_hot_Y[np.arange(Y.size), Y] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y

In [5]:
def getData():
    '''
    MNIST data set 
    x has 784 feature
    y is op value from 0 to 9 
    '''
    data = np.array(pd.read_csv('data/MNIST/MNIST_train.csv'))
    x = (data[:,1:]/255.).T
    y = oneHot(data[:,0])
    return x,y
    

In [6]:
class Activation:
    @staticmethod
    def ReLU(Z, isDerivation=False):
        if isDerivation:
            return Z > 0
        return np.maximum(Z, 0)

    @staticmethod
    def sigmoid(Z, isDerivation=False):
        if isDerivation:
            op = Activation.sigmoid(Z)
            return op * (1- op)
        return 1/(1 + np.exp(-Z))

    @staticmethod
    def softmax(Z,isDerivation=False):
        if isDerivation:
            return 1
        Z = Z - np.max(Z, axis=0)
        A = np.exp(Z) / sum(np.exp(Z))
        return A

In [7]:
class Accuracy:
    @staticmethod
    def multiClass(target, prediction):
        return np.argmax(target, axis=0) == np.argmax(prediction, axis=0)

In [8]:
class Error:
    @staticmethod
    def meanSquareError(target, predicted, isDerivation=False):
        if isDerivation:
            return 2 * (predicted - target) / np.size(target)
        loss = np.power(target - predicted, 2)
        accuracy = Accuracy.multiClass(target, predicted)
        return {'loss': loss, 'accuracy': accuracy}
    
    @staticmethod
    def crossEntropyLoss(target, predicted,  isDerivation=False):
        if isDerivation:
            return predicted - target
        loss = -target * np.log(predicted + 10 ** -100)
        accuracy = Accuracy.multiClass(target,predicted)
        return {'loss': loss, 'accuracy': accuracy}
    
    @staticmethod
    def hiddenError(target, predicted):
        return target.T.dot(predicted)

In [7]:
class DenseLayer:
    op = []
    bias = []
    weight = []
    output = []
    isInput=False
    noOfNodes = 0
    activation = None
    def __init__(self, inputSize=0, outputSize=0, activation=None,isInput=False,input=[]):
        '''
        inputSize -> no.of.input feature 
        outputSize -> no.of.output
        '''
        if isInput:
            self.output = input
            self.isInput = True
            self.noOfNodes = input.shape[0]
        else :
            self.noOfNodes = inputSize
            self.activation = activation
            self.weight = np.random.rand(inputSize, outputSize) - 0.5
            self.bias = np.random.rand(inputSize,1) - 0.5

    def generateWeight(self,*r):
        '''
        Receive input as set that define the set shape
        '''
        return np.random.randn(*r) - 0.5

In [12]:
class NeuralNetwork:
    input = []
    target = []
    layers = []
    history = {'loss': [], 'accuracy':[]}
    loss = None

    @staticmethod
    def forWord(w, b, x):
        return np.dot(w, x) + b

    @staticmethod
    def gradient(w, b, x, err, lr=0.01):
        w = w - (1/len(x[0]) * (err.dot(x.T))) * lr
        b = b - (lr * np.mean(err, axis=1).reshape(b.shape))
        return w,b  

    def __init__(self, input, target, loss):
        self.loss = loss
        self.input = input
        self.target = target
        self.layers.append(
            DenseLayer(None,None,None,isInput=True, input=self.input)
        )

    def append(self, node, activationFunction):
        preNode = self.layers[len(self.layers) -1]
        self.layers.append(
            DenseLayer(inputSize=node,outputSize=preNode.noOfNodes, activation=activationFunction)
        )

    def train(self, epoch=100, lr=0.01):
        for i in range(epoch):

            for j in range(len(self.layers)):

                if not self.layers[j].isInput:
                    self.layers[j].op = NeuralNetwork.forWord(self.layers[j].weight, self.layers[j].bias, self.layers[j-1].output)
                    self.layers[j].output = self.layers[j].activation(self.layers[j].op)

                if j == len(self.layers)-1 :
                    loss = self.loss(self.target,self.layers[j].output)
                    self.history['loss'].append(np.mean(loss['loss']))
                    self.history['accuracy'].append(np.mean(loss['accuracy']))
                    print(f"Epoch: {i+1} Loss: {self.history['loss'][-1]} Accuracy: {self.history['accuracy'][-1]}")
                    # calculate loss
                    self.backPropogation(loss['loss'],lr)


    def backPropogation(self, loss,lr):
        layer_length = len(self.layers)
        for index, layer in enumerate(self.layers[::-1]):
            if not layer.isInput:
                pervious_node = self.layers[layer_length - index - 2]

                layer.weight, layer.bias = NeuralNetwork.gradient(
                    w=layer.weight, b=layer.bias, x=pervious_node.output, err=loss,lr=lr)
                    
                oldw = np.copy(layer.weight)

                loss = Error.hiddenError(oldw, loss)
      

In [9]:
x,y = getData()

In [10]:
model = NeuralNetwork(np.copy(x),np.copy(y),Error.crossEntropyLoss)

model.append(10, Activation.ReLU)
model.append(10,Activation.softmax)
model.train(10)
del model

Layer :  1 9.695222881785638 0.0
Layer :  2 0.8758029390824879 4.5009360749428186e-06
Epoch: 1 Loss: 0.2840997948748314 Accuracy: 0.08061904761904762
Current Layer Weight :  (10, 10)  Pre layer Weight :  (10, 784)
Current Layer Weight :  (10, 784)  Pre layer Weight :  0
Layer :  1 9.691253650337572 0.0
Layer :  2 0.8783575991244833 4.327687858149742e-06
Epoch: 2 Loss: 0.285423818488201 Accuracy: 0.08028571428571428
Current Layer Weight :  (10, 10)  Pre layer Weight :  (10, 784)
Current Layer Weight :  (10, 784)  Pre layer Weight :  0
Layer :  1 9.696799199368703 0.0
Layer :  2 0.8872659220361144 3.9217769070783815e-06
Epoch: 3 Loss: 0.286973191037366 Accuracy: 0.07980952380952382
Current Layer Weight :  (10, 10)  Pre layer Weight :  (10, 784)
Current Layer Weight :  (10, 784)  Pre layer Weight :  0
Layer :  1 9.710681650097998 0.0
Layer :  2 0.8958455142728211 3.492410184188924e-06
Epoch: 4 Loss: 0.2887759890707759 Accuracy: 0.08021428571428571
Current Layer Weight :  (10, 10)  Pre lay

In [11]:
model1 = NeuralNetwork(np.copy(x),np.copy(y),Error.meanSquareError)

model1.append(10, Activation.ReLU)
model1.append(10,Activation.softmax)
model1.train(10, 0.1)
del model1

Epoch: 1 Loss: 0.09957427570808244 Accuracy: 0.10680952380952381
Current Layer Weight :  (10, 10)  Pre layer Weight :  (10, 784)
Current Layer Weight :  (10, 784)  Pre layer Weight :  0
Current Layer Weight :  (10, 10)  Pre layer Weight :  (10, 784)


ValueError: operands could not be broadcast together with shapes (10,10) (784,10) 

# Debug

In [14]:
def debug(epoch, loss, lr=0.1):
    hw = np.random.rand(10, 784) - 0.5
    hb = np.random.rand(10, 1) - 0.5
    ow = np.random.rand(10, 10) - 0.5
    ob = np.random.rand(10, 1) - 0.5
    
    for i in range(epoch):
        #forword
        #Hidden
        hid_op = NeuralNetwork.forWord(hw,hb,x)
        hid_act = Activation.ReLU(hid_op)

        #op layer
        op = NeuralNetwork.forWord(ow,ob,hid_act)
        y_pred = Activation.softmax(op)

        # backword
        # output error
        op_err = loss(y, y_pred, True) * Activation.softmax(op,True)
        ow,ob = NeuralNetwork.gradient(w=ow, b=ob, x=hid_act,err=op_err,lr=lr)

        # #hidden error
        hid_err = Error.hiddenError(ow,op_err) * Activation.ReLU(hid_op,True)
        hw,hb = NeuralNetwork.gradient(w=hw, b=hb, x=x,err=hid_err, lr=lr)

        err = loss(y, y_pred)
        print(f"Epoch : {i + 1}, Loss : {np.mean(err['loss'])}, Accuracy : {np.mean(err['accuracy'])}")

In [16]:
debug(100,Error.crossEntropyLoss,0.1)

Epoch : 1, Loss : 0.34088263436142524, Accuracy : 0.12157142857142857
Epoch : 2, Loss : 0.28871713950843847, Accuracy : 0.14923809523809523
Epoch : 3, Loss : 0.26792198899743125, Accuracy : 0.15895238095238096
Epoch : 4, Loss : 0.2551714713739749, Accuracy : 0.16523809523809524
Epoch : 5, Loss : 0.24659889125042836, Accuracy : 0.17254761904761906
Epoch : 6, Loss : 0.24043032279098517, Accuracy : 0.18183333333333335
Epoch : 7, Loss : 0.23556972321796343, Accuracy : 0.1925952380952381
Epoch : 8, Loss : 0.23140001645167174, Accuracy : 0.20711904761904762
Epoch : 9, Loss : 0.2276349588699246, Accuracy : 0.21942857142857142
Epoch : 10, Loss : 0.22415270565376905, Accuracy : 0.2318095238095238
Epoch : 11, Loss : 0.22088336052424143, Accuracy : 0.24457142857142858
Epoch : 12, Loss : 0.21779004106003602, Accuracy : 0.2566428571428571
Epoch : 13, Loss : 0.2148453765714435, Accuracy : 0.2670952380952381
Epoch : 14, Loss : 0.21202673923744203, Accuracy : 0.27735714285714286
Epoch : 15, Loss : 0.2

In [None]:
debug(100,Error.meanSquareError,0.1)

Epoch : 1, Loss : 0.11869653794026085, Accuracy : 0.0884047619047619
Epoch : 2, Loss : 0.11869644640788884, Accuracy : 0.0884047619047619
Epoch : 3, Loss : 0.11869635487570838, Accuracy : 0.0884047619047619
Epoch : 4, Loss : 0.11869626334406605, Accuracy : 0.0884047619047619
Epoch : 5, Loss : 0.11869617181296181, Accuracy : 0.0884047619047619
Epoch : 6, Loss : 0.1186960802823957, Accuracy : 0.0884047619047619
Epoch : 7, Loss : 0.11869598875236766, Accuracy : 0.0884047619047619
Epoch : 8, Loss : 0.11869589722287772, Accuracy : 0.0884047619047619
Epoch : 9, Loss : 0.11869580569392589, Accuracy : 0.0884047619047619
Epoch : 10, Loss : 0.11869571416551218, Accuracy : 0.0884047619047619
Epoch : 11, Loss : 0.11869562263773854, Accuracy : 0.0884047619047619
Epoch : 12, Loss : 0.11869553111050966, Accuracy : 0.0884047619047619
Epoch : 13, Loss : 0.1186954395838189, Accuracy : 0.0884047619047619
Epoch : 14, Loss : 0.11869534805766617, Accuracy : 0.0884047619047619
Epoch : 15, Loss : 0.1186952565