In [1]:
import numpy as np
import random
from matplotlib import pyplot as plt
from keras.datasets import fashion_mnist
from tqdm import tqdm
import math

In [None]:
!pip install cupy

In [2]:
class PreProc:
    def __init__(self):
        (self.trainx,self.trainy),(self.testx, self.testy) = fashion_mnist.load_data()
    def visualize(self,n):
        for i in range(n):
            plt.subplot(330+1+i) # ask someone why??
            plt.imshow(self.trainx[i], cmap = plt.get_cmap('gray'))
        plt.show()
    def flatten(self):
        trainx_flattened = self.trainx
        testx_flattened = self.testx
        trainx_flattened.shape = (60000,784)
        testx_flattened.shape = (10000,784)
        return trainx_flattened,testx_flattened
    def getLabels(self):
        return self.trainy, self.testy
    def getInputSize(self):
        return len(self.trainx[0])

In [None]:
import sys

In [20]:
class Functions:
    @staticmethod
    def sigmoid(input):
        input = np.clip(input, -100,100)
        return  1.0/(1.0+np.exp(-input))
    @staticmethod
    def softmax(input):
        input = np.clip(input, -100,100)
        return np.exp(input)/(np.sum(np.exp(input)))
    @staticmethod
    def onehot(input):
        result = np.zeros(10)
        result[input] = 1
        return result
    @staticmethod
    def crossEntropyLoss(y,yHat):
        loss = (-1/10.0) * np.sum(np.multiply(y, np.log(yHat+1e-10)) + np.multiply((1 - y), np.log(1 - (yHat+1e-10))))
        return loss
    @staticmethod
    def derivative_sigmoid(input):
        return Functions.sigmoid(input)*(1-Functions.sigmoid(input))
    def plot(input):
        plt.plot(input)
        plt.xlabel("Epochs")
        plt.ylabel("Loss")
        plt.title("Loss over iterations")

In [9]:
Functions.sigmoid(0)

0.5

In [23]:
class Algorithms:
    @staticmethod
    def ForwardProp(net, activate, output, inputLayer):
        L = len(net)-1
        a = []
        h = []
        weights = net[0][:,:len(net[0][0])-1]
        bias = net[0][:,len(net[0][0])-1]
        temp = np.matmul(weights,inputLayer)+bias
        a.append(temp)
        h.append(activate(a[0]))
        for k in range(1,L):
            weights = net[k][:,:len(net[k][0])-1]
            bias = net[k][:,len(net[k][0])-1]
            temp = np.matmul(weights,h[k-1])+bias
            a.append(bias + np.matmul(weights,h[k-1]))
            h.append(activate(a[k]))
        weights = net[L][:,:len(net[L][0])-1]
        bias = net[L][:,len(net[L][0])-1]
        temp = np.matmul(weights,h[L-1])+bias
        a.append(temp)
        h.append(output(a[L]))
        return a,h
    @staticmethod
    def BackProp(net, a, h, dataPoint, dataLabel):
        L = len(net)-1
        gradaL = -(Functions.onehot(dataLabel)-h[len(h)-1])
        gradient = np.zeros_like(net)
        for k in range(L,0,-1):
            gradW = np.outer(gradaL,h[k-1].T)
            gradB = gradaL
            gradB.resize((len(gradB),1))
            grad = np.append(gradW,gradB,axis=1)
            gradient[k] = grad

            gradhL_1 = np.matmul(np.transpose(net[k][:,len(net[k])-1]),gradaL)
            gradaL_1 = np.multiply(gradhL_1, Functions.derivative_sigmoid(a[k-1]))
            gradaL = gradaL_1
        gradW = np.outer(gradaL,dataPoint.T)
        gradB = gradaL
        gradB.resize((len(gradB),1))
        grad = np.append(gradW,gradB,axis=1)
        gradient[0] = grad
        return gradient

    @staticmethod
    def mGD(net, learningRate, train, label, activate, output):
        beta = 0.9
        prevGrad = np.zeros_like(net)
        for i in range(len(train)):
            a,h = Algorithms.ForwardProp(net, activate, output, train[i])
            gradient = Algorithms.BackProp(net, a, h, train[i], label[i])
            gradient += beta*prevGrad + learningRate*gradient
        gradient /= len(train)
        net = net - gradient
        return net

    @staticmethod
    def miniBatchMGD(net, batchSize, learningRate, dataPoints, dataLabels):
        batchSize = 32
        gradient = np.zeros_like(net)
        lossTrack = []
        beta = 0.9
        prevGrad = np.zeros_like(net)
        for epoch in tqdm(range(15)):
            indices = np.arange(len(dataPoints))
            np.random.shuffle(indices)
            batchX = dataPoints[indices]
            batchY = dataLabels[indices]
            for i in range(math.ceil(len(dataPoints)/batchSize)):
                trainer = batchX[i*batchSize:i*batchSize+batchSize]
                labeler = batchY[i*batchSize:i*batchSize+batchSize]
                batchLoss = 0.0
                gradient = 0.0
                for data in range(batchSize):
                    a,h = Algorithms.ForwardProp(net, Functions.sigmoid, Functions.softmax, trainer[data])
                    currGrad = Algorithms.BackProp(net, a, h, trainer[data], labeler[data])
                    batchLoss += Functions.crossEntropyLoss(Functions.onehot(labeler[data]), h[-1])
                    gradient += beta*prevGrad + (1-beta)*currGrad
                batchLoss /= batchSize
                gradient /= batchSize
                prevGrad = gradient
                net = net - learningRate*gradient 
                lossTrack.append(batchLoss)
            print("The loss after this epoch is: "+ str(batchLoss))
        return net

    @staticmethod
    def miniBatchADAM(net, batchSize, learningRate, dataPoints, dataLabels):
        batchSize = 32
        epsilon = 1e-5
        gradient = np.zeros_like(net)
        lossTrack = []
        beta2 = 0.9
        beta1 = 0.9
        prevGrad = np.zeros_like(net)
        for epoch in tqdm(range(15)):
            indices = np.arange(len(dataPoints))
            np.random.shuffle(indices)
            batchX = dataPoints[indices]
            batchY = dataLabels[indices]
            for i in range(math.ceil(len(dataPoints)/batchSize)):
                trainer = batchX[i*batchSize:i*batchSize+batchSize]
                labeler = batchY[i*batchSize:i*batchSize+batchSize]
                batchLoss = 0.0
                gradient = 0.0
                moment1 = 0.0
                moment2 = 0.0
                for data in range(batchSize):
                    a,h = Algorithms.ForwardProp(net, Functions.sigmoid, Functions.softmax, trainer[data])
                    currGrad = Algorithms.BackProp(net, a, h, trainer[data], labeler[data])
                    batchLoss += Functions.crossEntropyLoss(Functions.onehot(labeler[data]), h[-1])
                    gradient += currGrad
                moment1 = beta1*moment1 + (1-beta1)*gradient
                moment2 = beta2*moment2 + (1-beta2)*gradient**2
                moment1 = moment1/(1 - beta1**(epoch+1))
                moment2 = moment2/(1 - beta2**(epoch+1))
                denom = 0.0
                for i in moment2:
                    denom += np.linalg.norm(i)**2
                temp = (learningRate/(np.sqrt(denom)+epsilon))*moment1
                batchLoss /= batchSize
                gradient /= batchSize
                prevGrad = gradient
                net = net - temp*gradient 
                lossTrack.append(batchLoss)
            print("The loss after this epoch is: "+ str(batchLoss))
        return net

    def miniBatchNAG(net, batchSize, learningRate, dataPoints, dataLabels):
        batchSize = 32
        gradient = np.zeros_like(net)
        lossTrack = []
        beta = 0.9
        prevGrad = np.zeros_like(net)
        for epoch in tqdm(range(15)):
            indices = np.arange(len(dataPoints))
            np.random.shuffle(indices)
            batchX = dataPoints[indices]
            batchY = dataLabels[indices]
            for i in range(math.ceil(len(dataPoints)/batchSize)):
                trainer = batchX[i*batchSize:i*batchSize+batchSize]
                labeler = batchY[i*batchSize:i*batchSize+batchSize]
                batchLoss = 0.0
                gradient = 0.0
                for data in range(batchSize):
                    a,h = Algorithms.ForwardProp(net-prevGrad, Functions.sigmoid, Functions.softmax, trainer[data])
                    currGrad = Algorithms.BackProp(net-prevGrad, a, h, trainer[data], labeler[data])
                    batchLoss += Functions.crossEntropyLoss(Functions.onehot(labeler[data]), h[-1])
                    gradient += beta*prevGrad + (1-beta)*currGrad
                batchLoss /= batchSize
                gradient /= batchSize
                prevGrad = gradient
                net = net - learningRate*gradient 
                lossTrack.append(batchLoss)
            print("The loss after this epoch is: "+ str(batchLoss))
        return net
    
    @staticmethod
    def RMSProp(net, batchSize, learningRate, dataPoints, dataLabels):
        gradient = np.zeros_like(net, dtype = np.float32)
        lossTrack = []
        beta = 0.5
        epsilon = 0.000001
        learnerRate = np.full_like(net, learningRate)
        prevGrad = np.zeros_like(net)
        for epoch in tqdm(range(15)):
            indices = np.arange(len(dataPoints))
            np.random.shuffle(indices)
            batchX = dataPoints[indices]
            batchY = dataLabels[indices]
            for i in range(math.ceil(len(dataPoints)/batchSize)):
                trainer = batchX[i*batchSize:i*batchSize+batchSize]
                labeler = batchY[i*batchSize:i*batchSize+batchSize]
                batchLoss = 0.0
                gradient = 0.0
                v_t_1 = 0.0
                for data in range(batchSize):
                    a,h = Algorithms.ForwardProp(net, Functions.sigmoid, Functions.softmax, trainer[data])
                    currGrad = Algorithms.BackProp(net, a, h, trainer[data], labeler[data])
                    batchLoss += Functions.crossEntropyLoss(Functions.onehot(labeler[data]), h[-1])
                    gradient += currGrad
                batchLoss /= batchSize
                gradient /= batchSize
                prevGrad = gradient
                v_t = v_t_1*beta + (1-beta)*gradient**2
                v_t_1 = v_t
                temp = np.zeros_like(v_t)
                for i in range(len(v_t)):
                    temp[i] = np.sqrt(v_t[i])
                net = net - ((learnerRate)/(temp + epsilon))*gradient 
                lossTrack.append(batchLoss)
            print("The loss after this epoch is: "+ str(batchLoss))
        return net

    @staticmethod
    def miniBatchGD(net, batchSize, learningRate, dataPoints, dataLabels):
        batchSize = 32
        gradient = np.zeros_like(net)
        lossTrack = []
        for epoch in tqdm(range(15)):
            indices = np.arange(len(dataPoints))
            np.random.shuffle(indices)
            batchX = dataPoints[indices]
            batchY = dataLabels[indices]
            for i in range(math.ceil(len(dataPoints)/batchSize)):
                trainer = batchX[i*batchSize:i*batchSize+batchSize]
                labeler = batchY[i*batchSize:i*batchSize+batchSize]
                batchLoss = 0.0
                for data in range(batchSize):
                    a,h = Algorithms.ForwardProp(net, Functions.sigmoid, Functions.softmax, trainer[data])
                    currGrad = Algorithms.BackProp(net, a, h, trainer[data], labeler[data])
                    batchLoss += Functions.crossEntropyLoss(Functions.onehot(labeler[data]), h[-1])
                    gradient += currGrad
                batchLoss /= batchSize
                gradient /= batchSize
                net = net - learningRate*gradient 
                lossTrack.append(batchLoss)
            print("The loss after this epoch is: "+ str(batchLoss))
        return net
    
    @staticmethod
    def adaGrad(net, batchSize, learningRate, dataPoints, dataLabels):
        batchSize = 32
        gradient = np.zeros_like(net)
        lossTrack = []
        for epoch in tqdm(range(15)):
            indices = np.arange(len(dataPoints))
            np.random.shuffle(indices)
            batchX = dataPoints[indices]
            batchY = dataLabels[indices]
            for i in range(math.ceil(len(dataPoints)/batchSize)):
                trainer = batchX[i*batchSize:i*batchSize+batchSize]
                labeler = batchY[i*batchSize:i*batchSize+batchSize]
                batchLoss = 0.0
                v_t_1 = 0
                for data in range(batchSize):
                    a,h = Algorithms.ForwardProp(net, Functions.sigmoid, Functions.softmax, trainer[data])
                    currGrad = Algorithms.BackProp(net, a, h, trainer[data], labeler[data])
                    batchLoss += Functions.crossEntropyLoss(Functions.onehot(labeler[data]), h[-1])
                    gradient += currGrad
                batchLoss /= batchSize
                gradient /= batchSize
                v_t = v_t_1 + gradient**2
                denom = 0.0
                for val in v_t:
                    denom += np.linalg.norm(val)
                net = net - (learningRate)/(np.sqrt(denom)+0.00001)*gradient 
                lossTrack.append(batchLoss)
            print("The loss after this epoch is: "+ str(batchLoss))
        return net
    
    @staticmethod
    def evaluateNetwork(net,test_x, test_y):
        num_acc = 0
        for i in range(len(test_x)):
            a,h = Algorithms.ForwardProp(net, Functions.sigmoid, Functions.softmax, test_x[i])
            h = np.array(h, dtype = object)
            predY =   np.argmax(h[len(h)-1])
            if test_y[i] == predY:
                num_acc+=1
        print(num_acc/len(test_y))

In [None]:
len(h)

In [None]:
for i in range(L,0,-1):
    print(i, end = " ")

In [None]:
#The class of FeedForwardNeuralNetwor

class FFNet:
    #constructor
    def __init__(self,number_of_hidden_layers, number_of_inputs, number_of_outputs):
        self.number_of_inputs = number_of_inputs
        self.number_of_hidden_layers = number_of_hidden_layers
        self.number_of_outputs = number_of_outputs
        self.input = [0 for i in range(number_of_inputs)]
        self.output = [0 for i in range(10)]
        self.weights = []
        self.biases = []
        #self.hidden.append(np.random.random((number_of_inputs+1)))
    
    #Method for creating layers
    def addHiddenLayer(self,number_of_neurons):
        if(len(self.hidden) == 0):
            temp_weights = np.random.random((number_of_neurons, self.number_of_inputs))
            temp_biases = np.random.random((number_of_neurons))
        else:
            prev_neurons = len(self.hidden[len(self.hidden) - 1])
            temp_weights = np.random.random((number_of_neurons, prev_neurons))
            temp_biases = np.random.random((number_of_neurons))
        temp_weights = temp_weights/np.linalg.norm(temp_weights)
        temp_biases = temp_biases/np.linalg.norm(temp_biases)
        self.weights.append(temp_weights)
        self.biases.append(temp_biases)
    
    def addOutputLayer(self, number_of_outputs):
        if(len(self.hidden) == 0):
            #print("number of inputs: "+str(self.number_of_inputs))
            temp_weights = np.random.random((number_of_outputs, self.number_of_inputs))
            temp_biases = np.random.random((number_of_outputs))
        else:
            prev_neurons = len(self.hidden[len(self.hidden) - 1])
            temp_weights = temp_weights/np.linalg.norm(temp_weights)
            temp_biases = np.random.random((number_of_outputs))
        self.weights.append(temp_weights)
        self.biases.append(temp_biases)

    def solidify(self):
        self.weights = np.array(self.weights, dtype = object)
        self.biases = np.array(self.biases, dtype = object)

    def getNetwork(self):
        return self.network
    
    def ForwardProp(self, activate, output, inputLayer):
        return Algorithms.ForwardProp(self.network, activate, output, inputLayer)
    
    def lossCalc(self, lossFunction, Y):
        predY = self.historyA[(len(self.historyA)-1)]
        return lossFunction(Y,self.predY)

    def BackProp(self, a, h, dataPoint, dataLabel):
        return Algorithms.BackProp(self.network, a, h, dataPoint, dataLabel)

    def gradientDescent(self, learningRate, lossFunction, activate, output, dataPoints, dataLabels):
        losses = []
        for i in range(5):
            gradient = 0.0
            loss = 0
            for index in tqdm(range(len(dataPoints))):
                a,h = Algorithms.ForwardProp(self.network, activate, output, dataPoints[index])
                predY = h[(len(h)-1)]

                loss += Functions.crossEntropyLoss(dataLabels[index], predY)
                gradient+= Algorithms.BackProp(self.network, a, h, dataPoints[index], dataLabels[index])
            losses.append(loss)
            #gradient /= 60000
            self.network = self.network - learningRate * gradient
        return self.network, losses
    def stochGradientDescent(self, learningRate, lossFunction, activate, output, dataPoints, dataLabels):
        #losses = []
        net = self.network
        for i in range(3):
            for i in tqdm(range(len(dataPoints))):
                a,h = Algorithms.ForwardProp(net,Functions.sigmoid, Functions.softmax, dataPoints[i])
                gradient = Algorithms.BackProp(net, a,h,dataPoints[i], dataLabels[i])
                net = net - 0.1*gradient
        return self.network
                
    

In [7]:
#The class of FeedForwardNeuralNetwor

class FFNet:
    #constructor
    def __init__(self,number_of_hidden_layers, number_of_inputs, number_of_outputs):
        self.number_of_inputs = number_of_inputs
        self.number_of_hidden_layers = number_of_hidden_layers
        self.number_of_outputs = number_of_outputs
        self.input = [0 for i in range(number_of_inputs)]
        self.output = [0 for i in range(10)]
        self.hidden = []
        #self.hidden.append(np.random.random((number_of_inputs+1)))
    
    #Method for creating layers
    def addHiddenLayer(self,number_of_neurons):
        if(len(self.hidden) == 0):
            temp_weights = np.random.random((number_of_neurons, self.number_of_inputs+1)) #The +1 is for biases.
        else:
            prev_neurons = len(self.hidden[len(self.hidden) - 1])
            temp_weights = np.random.random((number_of_neurons, prev_neurons + 1)) # The +1 is for biases.
        temp_weights = temp_weights/np.linalg.norm(temp_weights)
        self.hidden.append(temp_weights)
    
    def addOutputLayer(self, number_of_outputs):
        if(len(self.hidden) == 0):
            #print("number of inputs: "+str(self.number_of_inputs))
            temp_weights = np.random.random((number_of_outputs, self.number_of_inputs)) #Bias not needed for the output layer.
        else:
            prev_neurons = len(self.hidden[len(self.hidden) - 1])
            temp_weights = np.random.random((number_of_outputs, prev_neurons + 1)) #Bias not needed for the output layer.
        temp_weights = temp_weights/np.linalg.norm(temp_weights)
        self.hidden.append(temp_weights)

    def solidify(self):
        self.network = np.array(self.hidden, dtype = object)

    def getNetwork(self):
        return self.network
    
    def ForwardProp(self, activate, output, inputLayer):
        return Algorithms.ForwardProp(self.network, activate, output, inputLayer)
    
    def lossCalc(self, lossFunction, Y):
        predY = self.historyA[(len(self.historyA)-1)]
        return lossFunction(Y,self.predY)

    def BackProp(self, a, h, dataPoint, dataLabel):
        return Algorithms.BackProp(self.network, a, h, dataPoint, dataLabel)

    def gradientDescent(self, learningRate, lossFunction, activate, output, dataPoints, dataLabels):
        losses = []
        for i in range(5):
            gradient = 0.0
            loss = 0
            for index in tqdm(range(len(dataPoints))):
                a,h = Algorithms.ForwardProp(self.network, activate, output, dataPoints[index])
                predY = h[(len(h)-1)]

                loss += Functions.crossEntropyLoss(dataLabels[index], predY)
                gradient+= Algorithms.BackProp(self.network, a, h, dataPoints[index], dataLabels[index])
            losses.append(loss)
            #gradient /= 60000
            self.network = self.network - learningRate * gradient
        return self.network, losses
    def stochGradientDescent(self, learningRate, lossFunction, activate, output, dataPoints, dataLabels):
        #losses = []
        net = self.network
        for i in range(3):
            for i in tqdm(range(len(dataPoints))):
                a,h = Algorithms.ForwardProp(net,Functions.sigmoid, Functions.softmax, dataPoints[i])
                gradient = Algorithms.BackProp(net, a,h,dataPoints[i], dataLabels[i])
                net = net - 0.1*gradient
        return self.network
                
    

In [None]:
class Parallelizer:
    @staticmethod
    @cuda.jit
    def entireDataSet(net, dataPoints, dataLabels, activate, output, learningRate, answer, losses):
        index = cuda.grid()
        a,h = ParallelAlgorithms.ForwardProp(net, activate, output, dataPoints[index])
        predY = h[(len(h)-1)]

        loss = Functions.crossEntropyLoss(dataLabels[index], predY)
        losses[index] = (loss)
        gradient = ParallelAlgorithms.BackProp(net, a, h, dataPoints[index], dataLabels[index])
        cuda.atomic.add(answer,0,gradient,answer)
        


In [27]:
if __name__ == '__main__':
    data = PreProc()
    #data.visualize(5)
    train_x, test_x = data.flatten()
    train_y, test_y = data.getLabels()
    trainx = train_x/255
    testx = test_x/255
    neuralNet = FFNet(0,data.getInputSize(), 10)
    neuralNet.addHiddenLayer(128)
    neuralNet.addHiddenLayer(128)
    neuralNet.addHiddenLayer(128)
    neuralNet.addOutputLayer(10)
    neuralNet.solidify()
    net = neuralNet.getNetwork()
    net = Algorithms.miniBatchGD(net, 100, 0.1, trainx, train_y)
    #Algorithms.evaluateNetwork(net,testx,test_y)

  7%|██▊                                        | 1/15 [03:43<52:14, 223.88s/it]

The loss after this epoch is: 0.32440024092462694


 13%|█████▋                                     | 2/15 [04:14<23:50, 110.01s/it]

The loss after this epoch is: 0.2904860150882165


 20%|████████▊                                   | 3/15 [04:43<14:40, 73.34s/it]

The loss after this epoch is: 0.26754650009257924


 27%|███████████▋                                | 4/15 [05:14<10:21, 56.47s/it]

The loss after this epoch is: 0.24244075090449865


 33%|██████████████▋                             | 5/15 [05:44<07:48, 46.87s/it]

The loss after this epoch is: 0.23071834449796919


 40%|█████████████████▌                          | 6/15 [06:14<06:10, 41.15s/it]

The loss after this epoch is: 0.21912279169502918


 47%|████████████████████▌                       | 7/15 [06:44<05:00, 37.62s/it]

The loss after this epoch is: 0.21777234347610336


 53%|███████████████████████▍                    | 8/15 [07:15<04:07, 35.37s/it]

The loss after this epoch is: 0.21668532485532507


 60%|██████████████████████████▍                 | 9/15 [07:45<03:23, 33.91s/it]

The loss after this epoch is: 0.19497692747464007


 67%|████████████████████████████▋              | 10/15 [08:15<02:43, 32.62s/it]

The loss after this epoch is: 0.18754285269982082


 73%|███████████████████████████████▌           | 11/15 [08:46<02:08, 32.05s/it]

The loss after this epoch is: 0.19250813901429287


 80%|██████████████████████████████████▍        | 12/15 [09:16<01:34, 31.57s/it]

The loss after this epoch is: 0.1901191438153988


 87%|█████████████████████████████████████▎     | 13/15 [09:47<01:02, 31.25s/it]

The loss after this epoch is: 0.17000313838231434


 93%|████████████████████████████████████████▏  | 14/15 [10:17<00:30, 30.97s/it]

The loss after this epoch is: 0.17393587493114354


100%|███████████████████████████████████████████| 15/15 [10:47<00:00, 43.19s/it]

The loss after this epoch is: 0.1621468209220023





NameError: name 'Algorithms' is not defined

In [None]:
np.sqrt(v_t)

In [28]:
Algorithms.evaluateNetwork(net,testx,test_y)


0.5604


In [None]:
a,h = Algorithms.ForwardProp(net, Functions.sigmoid, Functions.softmax, train_x[500])

In [None]:
print(net)

In [None]:

batchSize = 32
gradient = np.zeros_like(net)
lossTrack = []
for epoch in tqdm(range(15)):
    indices = np.arange(len(trainx))
    np.random.shuffle(indices)
    batchX = trainx[indices]
    batchY = train_y[indices]
    for i in range(math.ceil(len(trainx)/batchSize)):
        trainer = batchX[i*batchSize:i*batchSize+batchSize]
        labeler = batchY[i*batchSize:i*batchSize+batchSize]
        batchLoss = 0.0
        for data in range(batchSize):
            a,h = Algorithms.ForwardProp(net, Functions.sigmoid, Functions.softmax, trainer[data])
            currGrad = Algorithms.BackProp(net, a, h, trainer[data], labeler[data])
            batchLoss += Functions.crossEntropyLoss(Functions.onehot(labeler[data]), h[-1])
            gradient += currGrad
        batchLoss /= 32
        gradient /= 32
        net = net - 0.01*gradient 
        lossTrack.append(batchLoss)
    print("The loss after this epoch is: "+ str(batchLoss))

In [None]:
net

In [None]:
gradient = neuralNet.BackProp(a,h,train_x[1], train_y[1])


In [None]:
net = net - gradient*0.1

In [None]:
net

In [None]:
np.argmax(np.array(h[L]))

In [None]:
while np.argmax(np.array(h[L])):
    for i in range(10):
        a,h = Algorithms.ForwardProp(net,Functions.sigmoid, Functions.softmax, train_x[4])
        gradient = neuralNet.BackProp(a,h,train_x[4], train_y[4])
        net = net - 0.1*gradient
        print(np.argmax(np.array(h[L])), end = ", "),

In [None]:
train_y[4]

In [None]:
num_acc = 0
for i in range(len(test_x)):
    a,h = Algorithms.ForwardProp(net, Functions.sigmoid, Functions.softmax, test_x[i])
    h = np.array(h)
    predY =   np.argmax(h[len(h)-1])
    print(predY)
    if test_y[i] == predY:
        num_acc+=1
print(num_acc/len(test_y), end = " ")

In [None]:
len(test_y)

In [None]:
gradient[0].shape

In [None]:
for i in gradient[0]:
    print(i, end = " ")

In [None]:
gradient[0]

In [None]:
gradaL = -(Functions.onehot(train_y[1])-h[len(h)-1])

In [None]:
gradhL_1 = np.matmul(np.transpose(net[(len(net)-1)]),aL)

In [None]:
gradaL_1 = np.multiply(net[len(net)-1][:,:len(net[len(net)-1][0])-1], Functions.derivative_sigmoid(a[len(net)-2]))

In [None]:
gradW = np.outer(gradaL,h[len(net)-2].T)
gradB = gradaL


In [None]:
gradB.resize((len(gradB),1))

In [None]:
gradB.shape

In [None]:
np.append(gradW,gradB.resize((10,1)),axis=1)

In [None]:
gradaL.shape

In [None]:
a[len(net)-2].shape

In [None]:
weights = net[0][:,:len(net[0][0])-1]
bias = net[0][:,len(net[0][0])-1]
temp = np.matmul(weights,train_x[0])+bias
temp = temp/np.linalg.norm(temp)
a = []
a.append(temp)
h = []
h.append(Functions.sigmoid(a[0]))

In [None]:
weights = net[L][:,:len(net[L][0])-1]
bias = net[L][:,len(net[L][0])-1]
temp = np.matmul(weights,h[0])+bias
temp = temp/np.linalg.norm(temp)

In [None]:
L = len(net)-1

In [None]:
weights.shape

In [None]:
#The class of FeedForwardNeuralNetwor

class FFNet:
    #constructor
    hidden = []
    input = []
    output = []
    def __init__(self,number_of_hidden_layers, number_of_inputs, number_of_outputs):
        self.number_of_inputs = number_of_inputs
        self.number_of_hidden_layers = number_of_hidden_layers
        self.number_of_outputs = number_of_outputs
        #At the same time, the layers input layers mus also be initialized.

        input = [0 for i in range(number_of_inputs)]
        output = [0 for i in range(number_of_outputs)]
        hidden = [[]]

        #input and output layers are nothing but simple lists
    
    #Method for creating layers
    def add_hidden_layer(number_of_neurons):
        temp_weights = [0 for i in range(number_of_neurons+1)] #The +1 is for bias values
        hidden.append(temp_weights)
    
    def backward_propagate(a,h, pred_y):
        delthet[L] = -(exp(y) - pred_y) #with respect to output layer
        for k in range(0,L-1,-1):
            delthetw = np.matmul(delthet[k], h[k-1].T)
            delthetb = delthet[k]
            deltheth = np.matmul(weights[k].T, delthet[k])
            delthet[k-1] = hadamard(deltheth, preac(a)) 

    def forward_propagate():
        #here, we are calculating the preactivations and activations.
        #we then store them in an array and return it.
        
        for k in range(number_of_levels-1):
            a[k] = biases[k] + np.matmul(weights[k], h[k-1])
            h[k] = g(a[k])
        a[number_of_levels-1] = biases[number_of_levels] + np.matmul(weights[number_of_levels],h[number_of_levels-1])
        pred_y = output(a[number_of_levels-1])
        return a,h, pred_y


    def gradient_descent():
        a,h, pred_y = forward_propagate()
        delthet = backward_propagate(a,h, pred_y)
        thet += delthet

    def fit(dataset):
        for x,y in dataset:
            loss = forward(x,y)
            delthet = backward(loss)
            thet += learn_rate*delthet
    