In [1]:
import numpy as np
import pandas as pd

In [2]:
def oneHot(Y):
    one_hot_Y = np.zeros((Y.size, Y.max() + 1))
    one_hot_Y[np.arange(Y.size), Y] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y

In [3]:
def getData():
    '''
    MNIST data set 
    x has 784 feature
    y is op value from 0 to 9 
    '''
    data = np.array(pd.read_csv('data/MNIST/MNIST_train.csv'))
    x = (data[:,1:]/255).T
    y = oneHot(data[:,0])
    return x,y
    

In [4]:
class Activation:
    @staticmethod
    def ReLU(Z, isDerivation=False):
        if isDerivation:
            return Z > 0
        return np.maximum(Z, 0)

    @staticmethod
    def sigmoid(Z, isDerivation=False):
        if isDerivation:
            op = Activation.sigmoid(Z)
            return op * (1- op)
        return 1/(1 + np.exp(-Z))

    @staticmethod
    def softmax(Z,isDerivation=False):
        if isDerivation:
            pass
        A = np.exp(Z) / sum(np.exp(Z))
        return A

In [5]:
class Accuracy:
    @staticmethod
    def multiClass(target, prediction):
        return np.argmax(target, axis=0) == np.argmax(prediction, axis=0)

In [6]:
class Error:
    def meanSquareError(self, isDerivation=False):
        pass
    
    @staticmethod
    def crossEntropyLoss(target, predicted,  isDerivation=False):
        if isDerivation:
            return predicted - target
        loss = -target * np.log(predicted)
        accuracy = Accuracy.multiClass(target,predicted)
        return {'loss': loss, 'accuracy': accuracy}
    
    @staticmethod
    def hiddenError(target, predicted):
        return target.T.dot(predicted)

In [7]:
class DenseLayer:
    bias = []
    weight = []
    output = []
    isInput=False
    noOfNodes = 0
    activation = None
    def __init__(self, inputSize=0, outputSize=0, activation=None,isInput=False,input=[]):
        '''
        inputSize -> no.of.input feature 
        outputSize -> no.of.output
        '''
        if isInput:
            self.output = input
            self.isInput = True
            self.noOfNodes = input.shape[0]
        else :
            self.noOfNodes = inputSize
            self.activation = activation
            self.weight = self.generateWeight(inputSize, outputSize)
            self.bias = self.generateWeight(inputSize,1)

    def generateWeight(self,*r):
        '''
        Receive input as set that define the set shape
        '''
        return np.random.randn(*r) - 0.5

In [8]:
class NeuralNetwork:
    input = []
    target = []
    layers = []
    history = {'loss': [], 'accuracy':[]}
    loss = None

    def __init__(self, input, target, loss):
        self.loss = loss
        self.input = input
        self.target = target
        self.layers.append(DenseLayer(None,None,None,isInput=True, input=self.input))

    def append(self, node, activationFunction):
        preNode = self.layers[len(self.layers) -1]
        self.layers.append(DenseLayer(inputSize=node,outputSize=preNode.noOfNodes, activation=activationFunction))

    def train(self, epoch=100):
        for i in range(epoch):
            for j in range(len(self.layers)):
                if not self.layers[j].isInput:
                    self.layers[j].output = self.forWord(self.layers[j], self.layers[j-1].output)
                if j == len(self.layers)-1 :
                    loss = self.loss(self.target,self.layers[j].output)
                    self.history['loss'].append(np.mean(loss['loss']))
                    self.history['accuracy'].append(np.mean(loss['accuracy']))
                    index = len(self.history['loss']) -1
                    print(f"Epoch: {i+1} Loss: {self.history['loss'][index]} Accuracy: {self.history['accuracy'][index]}")
                    # calculate loss
                    self.backPropogation(loss['loss'])



    def forWord(self, layer, input):
        return layer.activation(np.dot(layer.weight, input) + layer.bias)


    def backPropogation(self, loss):
        layer_length = len(self.layers)
        for index,layer in enumerate(self.layers[::-1]):
            if not layer.isInput:
                pervious_node = self.layers[layer_length - index - 1]
                if index != 0:
                    # previous layer error
                    loss = Error.hiddenError(pervious_node.weight,loss)
                layer.weight,layer.bias = self.gradient(layer.weight, layer.bias, pervious_node.output, loss)


    def gradient(self, w, b, x, err, lr=0.01):
        w = w - (1/len(x[0]) * (err.dot(x.T))) * lr
        b = b - (lr * np.mean(err, axis=1).reshape(b.shape))
        return w,b        

In [9]:
x,y = getData()

In [10]:
model = None
model = NeuralNetwork(x,y,Error.crossEntropyLoss)

model.append(10, Activation.ReLU)
model.append(10,Activation.softmax)

In [11]:
model.train(10)

Epoch: 1 Loss: 0.2833525205223664 Accuracy: 0.09971428571428571


ValueError: operands could not be broadcast together with shapes (10,784) (784,10) 

In [None]:
test = np.copy(model.layers[2].output)
test.shape

(10, 42000)

In [None]:
-y[:,0]*np.log(test[:,0])

array([0.      , 2.242442, 0.      , 0.      , 0.      , 0.      ,
       0.      , 0.      , 0.      , 0.      ])

In [None]:
test[:,0]
np.log(0.5)

-0.6931471805599453

In [None]:
np.mean(np.argmax(y,axis=0) == np.argmax(test, axis=0))

0.09945238095238096

In [None]:
np.mean(-y * np.log(test))

0.2415347100265878

In [16]:
model.layers

[<__main__.DenseLayer at 0x7f6582f1fee0>,
 <__main__.DenseLayer at 0x7f6582f1fa30>,
 <__main__.DenseLayer at 0x7f6582f1fc10>,
 <__main__.DenseLayer at 0x7f6582e124a0>,
 <__main__.DenseLayer at 0x7f6582aab9d0>,
 <__main__.DenseLayer at 0x7f65b4926770>,
 <__main__.DenseLayer at 0x7f6582d3e410>,
 <__main__.DenseLayer at 0x7f6582d3e0e0>,
 <__main__.DenseLayer at 0x7f6582ae92d0>]