In [45]:
import numpy as np
import pandas as pd

In [46]:
def oneHot(Y):
    one_hot_Y = np.zeros((Y.size, Y.max() + 1))
    one_hot_Y[np.arange(Y.size), Y] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y

In [47]:
def getData():
    '''
    MNIST data set 
    x has 784 feature
    y is op value from 0 to 9 
    '''
    data = np.array(pd.read_csv('data/MNIST/MNIST_train.csv'))
    x = (data[:,1:]/255).T
    y = oneHot(data[:,0])
    return x,y
    

In [48]:
class Activation:
    @staticmethod
    def ReLU(Z, isDerivation=False):
        if isDerivation:
            return Z > 0
        return np.maximum(Z, 0)

    @staticmethod
    def sigmoid(Z, isDerivation=False):
        if isDerivation:
            op = Activation.sigmoid(Z)
            return op * (1- op)
        return 1/(1 + np.exp(-Z))

    @staticmethod
    def softmax(Z,isDerivation=False):
        if isDerivation:
            return 1
        Z = Z - np.max(Z, axis=0)
        A = np.exp(Z) / sum(np.exp(Z))
        return A

In [49]:
class Accuracy:
    @staticmethod
    def multiClass(target, prediction):
        return np.argmax(target, axis=0) == np.argmax(prediction, axis=0)

In [50]:
class Error:
    @staticmethod
    def meanSquareError(target, predicted, isDerivation=False):
        if isDerivation:
            return 2 * (predicted - target) / np.size(target)
        loss = np.power(target - predicted, 2)
        accuracy = Accuracy.multiClass(target, predicted)
        return {'loss': loss, 'accuracy': accuracy}
    
    @staticmethod
    def crossEntropyLoss(target, predicted,  isDerivation=False):
        if isDerivation:
            return predicted - target
        loss = -target * np.log(predicted + 10 ** -100)
        accuracy = Accuracy.multiClass(target,predicted)
        return {'loss': loss, 'accuracy': accuracy}
    
    @staticmethod
    def hiddenError(target, predicted):
        return target.T.dot(predicted)

In [51]:
class DenseLayer:
    op = []
    bias = []
    weight = []
    output = []
    isInput=False
    noOfNodes = 0
    activation = None
    def __init__(self, inputSize=0, outputSize=0, activation=None,isInput=False,input=[]):
        '''
        inputSize -> no.of.input feature 
        outputSize -> no.of.output
        '''
        if isInput:
            self.output = input
            self.isInput = True
            self.noOfNodes = input.shape[0]
        else :
            self.noOfNodes = inputSize
            self.activation = activation
            self.weight = self.generateWeight(inputSize, outputSize)
            self.bias = self.generateWeight(inputSize,1)

    def generateWeight(self,*r):
        '''
        Receive input as set that define the set shape
        '''
        return np.random.randn(*r) - 0.5

In [52]:
class NeuralNetwork:
    input = []
    target = []
    layers = []
    history = {'loss': [], 'accuracy':[]}
    loss = None

    def __init__(self, input, target, loss):
        self.loss = loss
        self.input = input
        self.target = target
        self.layers.append(DenseLayer(None,None,None,isInput=True, input=self.input))

    def append(self, node, activationFunction):
        preNode = self.layers[len(self.layers) -1]
        self.layers.append(DenseLayer(inputSize=node,outputSize=preNode.noOfNodes, activation=activationFunction))

    def train(self, epoch=100, lr=0.01):
        for i in range(epoch):
            for j in range(len(self.layers)):
                if not self.layers[j].isInput:
                    self.layers[j].output = self.forWord(self.layers[j], self.layers[j-1].output)
                if j == len(self.layers)-1 :
                    loss = self.loss(self.target,self.layers[j].output)
                    self.history['loss'].append(np.mean(loss['loss']))
                    self.history['accuracy'].append(np.mean(loss['accuracy']))
                    # index = len(self.history['loss']) -1
                    print(f"Epoch: {i+1} Loss: {self.history['loss'][-1]} Accuracy: {self.history['accuracy'][-1]}")
                    # calculate loss
                    self.backPropogation(loss['loss'],lr)

    def forWord(self, layer, input):
        layer.op = np.dot(layer.weight, input) + layer.bias
        return layer.activation(layer.op)

    def backPropogation(self, loss,lr):
        layer_length = len(self.layers)
        for index, layer in enumerate(self.layers[::-1]):
            if not layer.isInput:
                pervious_node = self.layers[layer_length - index - 2]
                oldw = np.copy(layer.weight)
                layer.weight, layer.bias = self.gradient(
                    layer.weight, layer.bias, pervious_node.output, loss,lr)
                loss = Error.hiddenError(oldw, loss)

    def gradient(self, w, b, x, err, lr=0.01):
        w = w - (1/len(x[0]) * (err.dot(x.T))) * lr
        b = b - (lr * np.mean(err, axis=1).reshape(b.shape))
        return w,b        

In [53]:
x,y = getData()

In [54]:
model = None
model = NeuralNetwork(np.copy(x),np.copy(y),Error.crossEntropyLoss)

model.append(10, Activation.ReLU)
model.append(10,Activation.softmax)
model.train(100)

Epoch: 1 Loss: 0.29478940370192547 Accuracy: 0.09945238095238096
Epoch: 2 Loss: 0.29491318111664233 Accuracy: 0.09945238095238096
Epoch: 3 Loss: 0.2950373862164145 Accuracy: 0.09945238095238096
Epoch: 4 Loss: 0.29516385512119103 Accuracy: 0.09945238095238096
Epoch: 5 Loss: 0.2952929029524713 Accuracy: 0.09945238095238096
Epoch: 6 Loss: 0.2954235295969569 Accuracy: 0.09945238095238096
Epoch: 7 Loss: 0.2955545478081977 Accuracy: 0.09945238095238096
Epoch: 8 Loss: 0.2956875267785697 Accuracy: 0.09945238095238096
Epoch: 9 Loss: 0.2958303134367198 Accuracy: 0.09945238095238096
Epoch: 10 Loss: 0.29598257494962793 Accuracy: 0.09945238095238096
Epoch: 11 Loss: 0.2961474790923365 Accuracy: 0.09945238095238096
Epoch: 12 Loss: 0.2963312034787053 Accuracy: 0.09945238095238096
Epoch: 13 Loss: 0.2965283425970812 Accuracy: 0.09945238095238096
Epoch: 14 Loss: 0.2967468825640848 Accuracy: 0.09942857142857142
Epoch: 15 Loss: 0.2969996999411071 Accuracy: 0.09942857142857142
Epoch: 16 Loss: 0.297290632573

In [55]:
model1 = NeuralNetwork(np.copy(x),np.copy(y),Error.meanSquareError)

model1.append(10, Activation.ReLU)
model1.append(10,Activation.softmax)
model1.train(10, 0.1)

Epoch: 1 Loss: 0.11676525320290639 Accuracy: 0.09838095238095237


ValueError: operands could not be broadcast together with shapes (10,10) (784,10) 

# Debug

In [56]:
def generateWeight(*r):
    '''
    Receive input as set that define the set shape
    '''
    return np.random.randn(*r) - 0.5

In [57]:
hw = generateWeight(10,784)
hb = generateWeight(10,1)

ow = generateWeight(10,10)
ob = generateWeight(10,1)

In [58]:
def forWord(x,w,b):
    return np.dot(w, x) + b

def backWord(err, w,b, x,lr = 0.01):
    m = 1/len(x[0])
    w = w - (m * (err.dot(x.T))) * lr
    b = b - (m * np.sum(err)) * lr
    # b = b - (lr * np.mean(err, axis=1).reshape(b.shape))
    return w, b

In [60]:
for i in range(50):
    
    #forword
    #Hidden
    # hid_op = forWord(x,hw,hb)
    hid_op = hw.dot(x)+hb
    hid_act = Activation.ReLU(hid_op)

    #op layer
    op = ow.dot(hid_act)+ob
    # op = forWord(hid_act,ow,ob)
    y_pred = Activation.softmax(op)

    #backword
    #output error
    op_err = y_pred - y
    # ow,ob = backWord(op_err, ow, ob, hid_act)
    ow = ow - (1/len(x[0]) * op_err.dot(hid_op.T)) * 0.01
    ob = ob - (1/len(x[0]) * np.sum(op_err)) * 0.01

    # #hidden error
    hid_err = Error.hiddenError(ow,op_err) * Activation.ReLU(hid_op,True)
    # hw,hb = backWord(hid_err, hw, hb, x)
    hw = hw - (1/len(x[0]) * hid_err.dot(x.T)) * 0.01
    hb = hb - (1/len(x[0]) * np.sum(hid_err)) * 0.01

    loss = Error.crossEntropyLoss(y, y_pred)
    print(f"Epoch : {i + 1}, Loss : {np.mean(loss['loss'])}, Accuracy : {np.mean(loss['accuracy'])}")

Epoch : 1, Loss : 0.29245212766522855, Accuracy : 0.09035714285714286
Epoch : 2, Loss : 0.2924522499608775, Accuracy : 0.09035714285714286
Epoch : 3, Loss : 0.29245237401354474, Accuracy : 0.09035714285714286
Epoch : 4, Loss : 0.29245249980705285, Accuracy : 0.09035714285714286
Epoch : 5, Loss : 0.2924526273244784, Accuracy : 0.09035714285714286
Epoch : 6, Loss : 0.2924527565481789, Accuracy : 0.09035714285714286
Epoch : 7, Loss : 0.2924528874598211, Accuracy : 0.09035714285714286
Epoch : 8, Loss : 0.2924530200404109, Accuracy : 0.09035714285714286
Epoch : 9, Loss : 0.29245315427032387, Accuracy : 0.09035714285714286
Epoch : 10, Loss : 0.2924532901293381, Accuracy : 0.09035714285714286
Epoch : 11, Loss : 0.29245342759666676, Accuracy : 0.09035714285714286
Epoch : 12, Loss : 0.29245356665099254, Accuracy : 0.09035714285714286
Epoch : 13, Loss : 0.29245370727050224, Accuracy : 0.09035714285714286
Epoch : 14, Loss : 0.2924538494329219, Accuracy : 0.09035714285714286
Epoch : 15, Loss : 0.2

In [None]:
np.argmax(y_pred,0), np.argmax(y_pred,0).shape

(array([3, 3, 3, ..., 3, 3, 3]), (42000,))

In [None]:
np.argmax(y,0), np.argmax(y,0).shape

(array([1, 0, 1, ..., 7, 6, 9]), (42000,))

In [None]:
len(x[0])

42000