In [33]:
import numpy as np
import random
from matplotlib import pyplot as plt
from keras.datasets import fashion_mnist
from tqdm import tqdm_notebook as tqdm
import math
import cv2 as cv
import wandb

In [32]:
class PreProc:
    '''Class used for preprocessing all images. 
        making a constructor of this class immediately loads in desired dataset
        
        visualize(n) logs into wandb 10 images each belonging to a separate class.
        
        flattenAndCentralize() makes the mean of the image arrays 0. This helps increasing the 
        training accuracy quicker per epoch
        
        getLabels() return labels in corresponding index fashion
        
        getInputsize returns the number of images present in the training sample
    '''
    def __init__(self):
        (self.trainx,self.trainy),(self.testx, self.testy) = fashion_mnist.load_data()
        
    def visualize(self,n):
        ''' args -> n :: The number of images desired to be visualized
            returns-> null
            
            shows the images via matplotlib
        '''
        for i in range(n):
            plt.subplot(330+1+i) # ask someone why??
            plt.imshow(self.trainx[i], cmap = plt.get_cmap('gray'))
        plt.show()
            
        
    def flattenAndCentralize(self):
        ''' args -> none
            returns -> trainx_flattened :: The training images, mean centered and flattened
                        into a 1 dimensional array
                    -> testx_flattened :: The testing images, mean centered and flattened
                        into a 1 dimensional array
        '''
        trainx_flattened = np.copy(self.trainx).astype('float64')
        testx_flattened = np.copy(self.testx).astype('float64')
        trainx_flattened -= np.mean(trainx_flattened, axis = 0)
        testx_flattened -= np.mean(testx_flattened, axis = 0)
        for image in trainx_flattened:
            image = cv.GaussianBlur(image,(3,3),cv.BORDER_DEFAULT)
        trainx_flattened.shape = (len(trainx_flattened),784)
        testx_flattened.shape = (10000,784)
        return trainx_flattened,testx_flattened
    

    
    def getLabels(self):
        ''' args -> none
            returns -> self.trainy :: The labels of the training data
                    -> self.testy :: The labels of the testing data
        '''
        return self.trainy, self.testy
    
    def getInputSize(self):
        return len(self.trainx[0])

In [3]:
class Functions:
    ''' The Functions class/ Library stores static methods corresponding to all the functions
        To be used in the program/training/testing.
        The correct implementation of these is vital to the correct working of the neural net
        model
    '''
    
    @staticmethod
    def sigmoid(input):
        ''' args -> input :: the input value, a numpy array type to the sigmoid function
            return -> np.array :: the np array containing calculated sigmoid values (per input[i])
        '''
        input = np.clip(input, -100,100)
        return  1.0/(1.0+np.exp(-input))
    
    @staticmethod
    def reLU(input):
        ''' args -> input :: the input value, a numpy array type to the reLU function
            return -> np.array :: the np array containing calculated relu values (per input[i])
        '''
        return np.maximum(0.01*input,input)
    
    @staticmethod
    def tanh(input):
        ''' args -> input :: the input value, a numpy array type to the tanh function
            return -> np.array :: the np array containing calculated tanh values (per input[i])
        '''
        return np.tanh(input)
    
    @staticmethod
    def identity(input):
        ''' args -> input :: the input value, a numpy array type to the identity function
            return -> np.array :: the np array containing calculated same values (per input[i])
        '''
        return input
    
    @staticmethod
    def softmax(input):
        ''' args -> input :: the input value, a numpy array type to the softmax function
            return -> np.array :: the np array containing calculated softmax values (per input[i])
        '''
        input = np.clip(input, -100,100)
        return np.exp(input)/(np.sum(np.exp(input)))
    
    @staticmethod
    def derivative_softmax(input):
        ''' args -> input :: the input value, a numpy array type to the derivative of the softmax function
            return -> np.array :: the np array containing calculated derivative of softmax values (per input[i])
        '''
        return Functions.softmax(input)*(1-Functions.softmax(input))
    
    @staticmethod
    def onehot(input):
        
        result = np.zeros(10)
        result[input] = 1
        return result
    
    @staticmethod
    def crossEntropyLoss(y,yHat):
        loss = (-1/10.0) * np.sum(np.multiply(y, np.log(yHat+1e-10)) + np.multiply((1 - y), np.log(1 - (yHat+1e-10))))
        return loss
    
    @staticmethod
    def mse(y,yHat):
        return np.mean(np.dot((y - yHat).T, (y - yHat)))
    
    @staticmethod
    def derivative_sigmoid(input):
        ''' args -> input :: the input value, a numpy array type to the derivative of the sigmoid function
            return -> np.array :: the np array containing calculated derivative of sigmoid values (per input[i])
        '''
        return Functions.sigmoid(input)*(1-Functions.sigmoid(input))
    
    @staticmethod
    def derivative_tanh(input):
        ''' args -> input :: the input value, a numpy array type to the derivative of the tanh function
            return -> np.array :: the np array containing calculated derivative of tanh values (per input[i])
        '''
        return (1 - (np.tanh(input)**2))
    
    @staticmethod
    def derivative_reLU(input):
        ''' args -> input :: the input value, a numpy array type to the derivative of the reLU function
            return -> np.array :: the np array containing calculated derivative of reLU values (per input[i])
        '''
        return np.where(input > 0, 1, 0.01)

    
    @staticmethod
    def derivative_identity(input):
        ''' args -> input :: the input value, a numpy array type to the derivative of the identity function
            return -> np.array :: the np array containing calculated derivative of identity values (per input[i])
        '''
        return 1
    
    @staticmethod
    def plot(input):
        ''' args -> input :: the loss list to be plotted
            return -> null 
            Just show the matplotlib plots for the loss
        '''
        plt.plot(input)
        plt.xlabel("Epochs")
        plt.ylabel("Loss")
        plt.title("Loss over iterations")
        plt.show()
    
    @staticmethod
    def plotAccuracy(input):
        ''' args -> input :: the accuracy list to be plotted
            return -> null 
            Just show the matplotlib plots for the accuracy
        '''
        plt.plot(input)
        plt.xlabel("Epochs")
        plt.ylabel("val accuracy")
        plt.title("Train over iterations")
        plt.show()

In [29]:
class Algorithms:
    ''' The Algorithms class/ libarary contains several functions and optimizers crucial for 
        the implementation of training and testing of the neural networks
        
        All these functions are static methods and therefore creation of an object instance
        of algorithms is unnecessary
    '''
    @staticmethod
    def ForwardProp(weights, bias, activate, output, inputLayer):
        '''
            args -> weights,biases :: The model on which loss is to be calculated
            args -> activate :: The activation Function to be used
            args -> output :: usually the softmax function
            args -> inputLayer :: The image upon which to Forward Prop
            
            return -> a,h :: The preactivation and activation lists for every layer of the model.'''
        
        L = len(weights)-1
        a = []
        h = []
        a.append(np.matmul(weights[0],inputLayer)+bias[0])
        h.append(activate(a[0]))
        for k in range(1,L):
            a.append(np.matmul(weights[k],h[k-1].T)+bias[k])
            h.append(activate(a[k]))
        a.append(np.matmul(weights[L],h[L-1].T)+bias[L])
        h.append(output(a[L]))
        return a,h
    
    @staticmethod
    def evaluateNetwork(weights, biases,activate, output, test_x, test_y):
        ''' 
            args -> weights,biases :: The model on which loss is to be calculated
            args -> activate :: The activation Function to be used
            args -> output :: usually the softmax function
            args -> inputLayer :: The image upon which to Forward Prop
            
            return -> double :: the accuracy of the model on the given images and labels split
        '''
        num_acc = 0
        for i in range(len(test_x)):
            a,h = Algorithms.ForwardProp(weights, biases, activate, output, test_x[i])
            h = np.array(h, dtype = object)
            predY =   np.argmax(h[len(h)-1])
            if test_y[i] == predY:
                num_acc+=1
        return (num_acc/len(test_y))

In [30]:
#The class of FeedForwardNeuralNetwor

class FFNet:
    ''' The neural Network class/library, has functions crucial to implementing the neural Network
        constructor initializes the network to adapt to the input layer size and also initializaes the output layer size
        
    '''
    #constructor
    def __init__(self,number_of_hidden_layers, number_of_inputs, number_of_outputs):
        self.number_of_inputs = number_of_inputs
        self.number_of_hidden_layers = number_of_hidden_layers
        self.number_of_outputs = number_of_outputs
        self.input = [0 for i in range(number_of_inputs)]
        self.output = [0 for i in range(10)]
        self.weights = []
        self.biases = []
    
    #Method for creating layers
    def addHiddenLayer(self,number_of_neurons, initialization):
        ''' args -> number_of_neurons :: The number of neurons to be added for this layer of the network
            args -> initialization :: The type of initialization used
            
            return -> null
        '''
        if(len(self.weights) == 0):
            temp_weights = np.random.randn(number_of_neurons, self.number_of_inputs)*0.01
            temp_biases = np.full((number_of_neurons), 0.01)
            if initialization == "xavier":
                temp_weights = np.random.randn(number_of_neurons, self.number_of_inputs)/np.sqrt((self.number_of_inputs)/2)
                #temp_biases = np.random.randn(number_of_neurons)*np.sqrt(1/(number_of_neurons))
        else:
            prev_neurons = len(self.weights[-1])
            temp_weights = np.random.randn(number_of_neurons, prev_neurons)*0.01
            temp_biases = np.full((number_of_neurons), 0.01)
            if initialization == "xavier":
                temp_weights = np.random.randn(number_of_neurons, prev_neurons)/np.sqrt((prev_neurons)/2)
                #temp_biases = np.random.randn(number_of_neurons)*np.sqrt(1/(number_of_neurons))

        self.weights.append(temp_weights)
        self.biases.append(temp_biases)
    
    def addOutputLayer(self, number_of_outputs, initialization):
        ''' To add the output layer
            args -> number_of_outputs :: The number of neurons in the output layer of the network
            args -> initialization :: The type of initialization used for this network layer
        '''
        if(len(self.weights) == 0):
            #print("number of inputs: "+str(self.number_of_inputs))
            temp_weights = np.random.randn(number_of_outputs, self.number_of_inputs)*0.01
            temp_biases = np.full((number_of_outputs), 0.01)
            if initialization == "xavier":
                temp_weights = np.random.randn(number_of_outputs, self.number_of_inputs)/np.sqrt((prev_neurons)/2)
        else:
            prev_neurons = len(self.weights[-1])
            temp_weights = np.random.randn(number_of_outputs, prev_neurons)*0.01
            temp_biases = np.full((number_of_outputs), 0.01)
            if initialization == "xavier":
                temp_weights = np.random.randn(number_of_outputs, prev_neurons)/np.sqrt((prev_neurons)/2)
                
        
        self.weights.append(temp_weights)
        self.biases.append(temp_biases)

    def solidify(self):
        ''' convert the entire list into a numpy array'''
        self.weights = np.array(self.weights, dtype = object)
        self.biases = np.array(self.biases, dtype = object)

    def getNetwork(self):
        ''' returns the weights, biases of the network'''
        return self.weights,self.biases
    
    def ForwardProp(self, activate, output, inputLayer):
        ''' Forward Propagate the network on the given activation function, output function, and input layer'''
        return Algorithms.ForwardProp(self.network, activate, output, inputLayer)
    
    def lossCalc(self, lossFunction, Y):
        ''' calulate the loss fucntion'''
        predY = self.historyA[(len(self.historyA)-1)]
        return lossFunction(Y,self.predY)

    def BackProp(self, a, h, dataPoint, dataLabel):
        '''call the back propagation'''
        return Algorithms.BackProp(self.network, a, h, dataPoint, dataLabel)
    
    def fit(self, optimizer, batchSize, learningRate, activation, trainx, train_y, decay, epochs, lossFunc):
        ''' the fit method basically trains the model for the given configuration'''
        #break data into training and validation
        indices = np.arange(len(trainx))
        np.random.shuffle(indices)
        trainx = trainx[indices]
        train_y = train_y[indices]
        
        valTest_x = trainx[int(0.9*len(trainx)):]
        valTest_y = train_y[int(0.9*len(train_y)):]
        
        trainx = trainx[:int(0.9*len(trainx))]
        train_y = train_y[:int(0.9*len(train_y))]
        
        ''' the selector if else blocks to choose the activation function and output function'''
        if activation == "relu":
            activate = Functions.reLU
            derivative = Functions.derivative_reLU
            output = Functions.softmax
        elif activation == "tanh":
            activate = Functions.tanh
            derivative = Functions.derivative_tanh
            output = Functions.softmax
        elif activation == "identity":
            activate = Functions.identity
            derivative = Functions.derivative_identity
            output = Functions.softmax
        else:
            activate = Functions.sigmoid
            derivative = Functions.derivative_sigmoid
            output = Functions.softmax
        
        #print(optimizer)
        ''' The if else block for selecting the appropriate optimizer'''
        if optimizer == "momentum":
            self.weights, self.biases = Algorithms.miniBatchMGD(self.weights,self.biases , batchSize, learningRate, activate, output, derivative,  trainx, train_y, valTest_x, valTest_y, decay, epochs, lossFunc)
        elif optimizer == "nag":
            self.weights, self.biases = Algorithms.miniBatchNAG(self.weights,self.biases , batchSize, learningRate,activate, output, derivative , trainx, train_y, valTest_x, valTest_y, decay, epochs, lossFunc)
        elif optimizer == "rmsprop":
            self.weights, self.biases = Algorithms.RMSProp(self.weights,self.biases , batchSize, learningRate, activate, output, derivative,  trainx, train_y, valTest_x, valTest_y, decay, epochs, lossFunc)
        elif optimizer == "adam":
            self.weights, self.biases = Algorithms.ADAM(self.weights,self.biases , batchSize, learningRate,activate, output, derivative,  trainx, train_y, valTest_x, valTest_y, decay, epochs, lossFunc)
        elif optimizer == "nadam":
            self.weights, self.biases = Algorithms.NADAM(self.weights,self.biases , batchSize, learningRate, activate, output, derivative,  trainx, train_y, valTest_x, valTest_y, decay, epochs, lossFunc)
        else:
            self.weights, self.biases = Algorithms.miniBatchGD(self.weights,self.biases , batchSize, learningRate, activate, output, derivative , trainx, train_y, valTest_x, valTest_y, decay, epochs, lossFunc)
        print(Algorithms.evaluateNetwork(self.weights, self.biases, activate, output, testx, test_y))       
        
        return self.weights,self.biases
            
    def evaluateNetwork(self, testx, tes_ty):
        ''' To evaluate Network on the given images and labels set.'''
        Algorithms.evaluateNetwork(self.weights, self.biases, testx, test_y)        
        

In [None]:
if __name__ == '__main__':
    #preprocessing of the data.
    data = PreProc()
    train_x, test_x = data.flattenAndCentralize()
    trainx = train_x/255.0
    testx = test_x/255.0
    train_y, test_y = data.getLabels()
    
    #create the neural network
    neuralNet = FFNet(0, len(trainx[0]), 10)
    
    #adding hidden layers
    neuralNet.addHiddenLayer(128, "xavier")
    neuralNet.addHiddenLayer(128, "xavier")
    neuralNet.addHiddenLayer(128, "xavier")
    neuralNet.addHiddenLayer(128, "xavier")
    neuralNet.addHiddenLayer(128, "xavier")
    neuralNet.addOutputLayer(10, "xavier")
    
    #adding output layer
    neuralNet.solidify()
    weights, biases = neuralNet.getNetwork()
    prediction = []
    
    #Forward Prop
    for image in trainx:
        a,h = Algorithms.ForwardProp(weights, biases, Functions.sigmoid, Functions.softmax, image)
        prediction.append(np.argmax(h[-1]))
    prediction = np.array(prediction)
    
    #print predictions
    print(prediction)
    
    #print accuracy on test
    print(Algorithms.evaluateNetwork(weights, biases, Functions.sigmoid, Functions.softmax, testx, test_y))