In [125]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split

In [126]:
def tanh(x):
    '''
    Function that returns the tanh of x

    Input: 
    x: A scalar or numpy array of any size.

    Output:
    tanh(x)
    '''
    return np.tanh(x)

def tanh_derivative(x):
    '''
    Function that returns the derivative of tanh(x)

    Input:
    x: A scalar or numpy array of any size.

    Output:
    tanh_derivative(x)
    '''
    return 1-np.tanh(x)**2

def ReLU(x):
    '''
    Function that returns the ReLU of x

    Input:
    x: A scalar or numpy array of any size.

    Output:
    ReLU(x)
    '''
    return np.maximum(0,x)

def ReLU_derivative(x):
    '''
    Function that returns the derivative of ReLU(x)

    Input:
    x: A scalar or numpy array of any size.

    Output:
    ReLU_derivative(x)
    '''
    return np.where(x>0,1,0)

def sigmoid(x):
    '''
    Function that returns the sigmoid of x

    Input:
    x: A scalar or numpy array of any size.

    Output:
    sigmoid(x)
    '''
    return 1/(1+np.exp(-x))

def sigmoid_derivative(x):
    '''
    Function that returns the derivative of sigmoid(x)

    Input:
    x: A scalar or numpy array of any size.

    Output:
    sigmoid_derivative(x)
    '''
    Sig_x = sigmoid(x)
    return Sig_x*(1-Sig_x)

def LeakyReLU(x):
    '''
    Function that returns the LeakyReLU of x

    Input:
    x: A scalar or numpy array of any size.

    Output:
    LeakyReLU(x)
    '''
    return np.where(x>0,x,0.01*x)

def LeakyReLU_derivative(x):
    '''
    Function that returns the derivative of LeakyReLU(x)

    Input:
    x: A scalar or numpy array of any size.

    Output:
    LeakyReLU_derivative(x)
    '''
    return np.where(x>0,1,0.01)

def Convert_Output_To_Probability_Distribution(x):
    '''
    Function that returns the probability distribution of the output

    Input:
    x: A numpy array of any size.

    Output:
    exps: Probability Distribution of the output
    '''
    exps = np.exp(np.float128(x))
    return exps/np.sum(exps,dtype=np.float128)

def SoftMax(ytrue,pred):
    '''
    Function that returns the loss of the output

    Input:
    ytrue: A numpy array of actual labels
    pred: A numpy array of predictions

    Output:
    output: Loss of the output
    '''
    Actual = pred[ytrue[0][0]]
    Numerator = np.exp(Actual)
    Denominator = np.sum(np.exp(np.float128(pred)),dtype=np.float128)
    output = (-1 * np.log(Numerator / Denominator))
    return output

def SoftMax_derivative(ytrue,pred):
    '''
    Function that returns the derivative of the loss of the output

    Input:
    ytrue: A numpy array of actual labels
    pred: A numpy array of predictions

    Output:
    Gradients: Derivative of the loss of the output
    '''
    pred = Convert_Output_To_Probability_Distribution(pred)
    Gradients = []
    for idx,output in enumerate(pred):
        if idx == ytrue[0][0]:
            Gradients.append(output - 1)
        else:
            Gradients.append(output)
    return [Gradients]

def SVM_Loss(ytrue,pred):
    '''
    Function that returns the loss of the output

    Input:
    ytrue: A numpy array of actual labels
    pred: A numpy array of predictions

    Output:
    Res: Loss of the output
    '''
    Actual = pred[ytrue[0][0]]
    Res = 0.0
    for idx,output in enumerate(pred):
        if idx != ytrue[0][0]:
            RightSide = np.float32(output - Actual + 1)
            Res += np.max((0,RightSide))
    return Res

def SVM_Loss_derivative(ytrue,pred):
    '''
    Function that returns the derivative of the loss of the output

    Input:
    ytrue: A numpy array of actual labels
    pred: A numpy array of predictions

    Output:
    Gradients: Derivative of the loss of the output
    '''
    Gradients = []
    Actual = pred[ytrue[0][0]]
    for idx,output in enumerate(pred):
        if idx == ytrue[0][0]:
            Gradients.append(0)
        else:
            if output - Actual + 1 > 0:
                Gradients.append(1)
            else:
                Gradients.append(0)
    return [Gradients]

class Fully_Connected_Layer():
    def __init__(self, input_size, output_size):
        '''
        Constructor for the Fully Connected Layer that takes input size and output size as input of the layer

        Input:
        input_size: Size of the input
        output_size: Size of the output

        Output:
        None
        '''
        self.weights = np.random.uniform(-1,1,(input_size, output_size)) #Initialize weights with random values in range -1 to 1
        self.bias = np.zeros((1, output_size)) #Initialize bias with 0
                             
    def Forward_Propagation(self, input_data):
        '''
        Function that returns the output of the layer

        Input:
        input_data: A numpy array of any size.

        Output:
        self.output: Output of the layer
        '''
        self.input = input_data #Store the input for backpropagation
        self.output = np.dot(self.input, self.weights) + self.bias #Calculate the output of the respective layer
        return self.output #Return the output of the layer

    def Backward_Propagation(self, output_error, lr):
        '''
        Function that performs backpropagation and returns the error of the layer

        Input:
        output_error: A numpy array of any size.

        Output:
        input_error: Backpropogation result of the layer
        '''
        input_error = np.dot(output_error, self.weights.T) #Dot product with weights because equation in forward propagation is WX + b i.e. derivative of WX w.r.t X is W
        weights_error = np.dot(self.input.T, output_error) #Dot product with input because equation in forward propagation is WX + b i.e. derivative of WX w.r.t W is X
        dBias = output_error #Since biases have local derivative 1 so, it will be same as the output error as it is added to the output of the layer in forward propagation

        #Updating the weights and biases of the layer
        self.weights -= (lr * weights_error)
        self.bias -= (lr * np.array(dBias))
        return input_error
    
class Activation_Layer():
    '''
    Class that defines the activation layer
    '''
    def __init__(self, activation, activation_derivative):
        '''
        Constructor for the activation layer that takes activation function and its derivative as input

        Input:
        activation: Activation function
        activation_derivative: Derivative of the activation function

        Output:
        None
        '''
        self.activation = activation
        self.activation_derivative = activation_derivative

    def Forward_Propagation(self, input_data):
        '''
        Function that returns the output of the layer

        Input:
        input_data: A numpy array of any size.

        Output:
        self.output: Output of the layer after applying activation function
        '''
        self.input = input_data
        self.output = self.activation(self.input)
        return self.output

    def Backward_Propagation(self, output_error, learning_rate): #Learning rate is not meant to be used here but it is here to avoid repeating statements for different layers
        '''
        Function that performs backpropagation and returns the error of the layer

        Input:
        output_error: A numpy array of any size.

        Output:
        input_error: Backpropogation result of the layer
        '''
        return self.activation_derivative(self.input) * output_error

class Neural_Network:
    '''
    Class that defines the neural Neural_Network architecture
    '''
    def __init__(self,LossFunction, LossFunction_Derivative):
        '''
        Constructor for the neural Neural_Network that takes no input

        Input:
        LossFunction: Loss function to be used
        LossFunction_Derivative: Derivative of the loss function to be used

        Output:
        None
        '''
        self.layers = [] #List to store the layers of the Neural_Network
        self.loss = LossFunction #Loss function
        self.loss_prime = LossFunction_Derivative #Derivative of the loss function

    def insert_layer(self, layer):
        '''
        Function that adds the layer to the Neural_Network

        Input:
        layer: Layer to be added to the Neural_Network

        Output:
        None
        '''
        self.layers.append(layer) #Add the layer to the list of layers
    
    def predict(self, input_data):
        '''
        Function that returns the output of the Neural_Network after predictions

        Input:
        input_data: A numpy array of any size.

        Output:
        result: Output of the Neural_Network after predictions
        '''
        samples = len(input_data) #Get the number of test examples
        result = [] #List to store the output of the Neural_Network

        #Iterating for each test example
        for i in range(samples):
            output = input_data[i][0]
            for layer in self.layers: #Pass the test example through each layer of the Neural_Network
                output = layer.Forward_Propagation(output)
            result.append(output) #Append the output of the output layer of the Neural_Network to the list
        return result #Return the list of outputs

    def fit(self, x_train, y_train, n_Epochs, learning_rate=0.01):
        '''
        Function to train the Neural_Network

        Input:
        x_train: Training data
        y_train: Training labels
        epochs: Number of epochs to train the Neural_Network
        learning_rate: Learning rate of the Neural_Network

        Output:
        None
        '''
        NumExamples = len(x_train)
        
        #Training loop
        for i in range(n_Epochs):
            RunningLoss = 0.0 #Running Error for each epoch
            for j in range(NumExamples):
                #Forward Propagation
                Result = x_train[j]
                for layer in self.layers:
                    Result = layer.Forward_Propagation(Result) #Pass the training example through each layer of the Neural_Network

                #Compute loss (for display purpose only)
                TempLoss = self.loss(y_train[j], Result[0])
                RunningLoss += TempLoss

                if self.loss == SVM_Loss and TempLoss == 0.0:
                    '''
                    If the loss is SVM_Loss and the loss is 0.0 then there is no need to perform backpropagation to update the weights and biases
                    '''
                    continue
                
                else:
                    #Backward propagation
                    error = self.loss_prime(y_train[j], Result[0])
                    for layer in reversed(self.layers):
                        error = layer.Backward_Propagation(error, learning_rate) #Pass the error through each layer of the Neural_Network in reverse order
                    
            #Calculate average error on all samples
            RunningLoss /= NumExamples #Average error
            print('Epoch:', i+1, 'Loss:', RunningLoss) #Printing the epoch number and the error

In [127]:
#Importing the required libraries
X,Y = load_digits(return_X_y=True)

#Normalizing the data
X = (X - np.min(X)) / (np.max(X) - np.min(X))

DF = pd.DataFrame(X)
DF['target'] = Y
DF.to_csv('data.csv')
DF.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,55,56,57,58,59,60,61,62,63,target
0,0.0,0.0,0.3125,0.8125,0.5625,0.0625,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.375,0.8125,0.625,0.0,0.0,0.0,0
1,0.0,0.0,0.0,0.75,0.8125,0.3125,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.6875,1.0,0.625,0.0,0.0,1
2,0.0,0.0,0.0,0.25,0.9375,0.75,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.1875,0.6875,1.0,0.5625,0.0,2
3,0.0,0.0,0.4375,0.9375,0.8125,0.0625,0.0,0.0,0.0,0.5,...,0.0,0.0,0.0,0.4375,0.8125,0.8125,0.5625,0.0,0.0,3
4,0.0,0.0,0.0,0.0625,0.6875,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.125,1.0,0.25,0.0,0.0,4


In [128]:
# Splitting the data into training and testing data
x_train,x_test,y_train,y_test = train_test_split(X,Y,test_size=0.2, stratify=Y,random_state=42,shuffle=True)

#Reshaping the data
x_train = x_train.reshape((np.shape(x_train)[0],1,np.shape(x_train)[1]))
x_test = x_test.reshape((np.shape(x_test)[0],1,np.shape(x_test)[1]))
y_train = y_train.reshape((np.shape(y_train)[0],1,1))
y_test = y_test.reshape((np.shape(y_test)[0],1,1))
print(np.shape(x_train),np.shape(x_test),np.shape(y_train),np.shape(y_test)) #Printing the shape of the data

(1437, 1, 64) (360, 1, 64) (1437, 1, 1) (360, 1, 1)


In [129]:
def Get_Accuracy(Actual,Pred):
    '''
    Function to calculate the accuracy of the model
    '''
    count = 0
    Total = 0
    for idx,actual in enumerate(Actual):
        if actual[0][0] == np.argmax(Pred[idx][0]):
            count += 1
        Total += 1
    return (count/Total)*100 #Returning the accuracy

In [130]:
Loss_Method_Accuracy = {}
for idxLoss,(Loss,LossDer) in enumerate(zip([SoftMax,SVM_Loss],[SoftMax_derivative,SVM_Loss_derivative])):
    print('Loss being used:',Loss.__name__)

    MNIST_NN = Neural_Network(Loss,LossDer)
    MNIST_NN.insert_layer(Fully_Connected_Layer(64, 128))                
    MNIST_NN.insert_layer(Activation_Layer(sigmoid, sigmoid_derivative))
    MNIST_NN.insert_layer(Fully_Connected_Layer(128, 128))
    MNIST_NN.insert_layer(Activation_Layer(sigmoid, sigmoid_derivative))
    MNIST_NN.insert_layer(Fully_Connected_Layer(128, 128))
    MNIST_NN.insert_layer(Activation_Layer(sigmoid, sigmoid_derivative))
    MNIST_NN.insert_layer(Fully_Connected_Layer(128, 128))
    MNIST_NN.insert_layer(Activation_Layer(sigmoid, sigmoid_derivative))
    MNIST_NN.insert_layer(Fully_Connected_Layer(128, 10))

    MNIST_NN.fit(x_train, y_train, n_Epochs=15, learning_rate=0.005)

    preds = MNIST_NN.predict(x_test)

    Accuracy = Get_Accuracy(y_test,preds)
    Loss_Method_Accuracy[Loss.__name__] = Accuracy
    print('Accuracy:',Accuracy)

print('The better performing loss function is:',max(Loss_Method_Accuracy, key=Loss_Method_Accuracy.get))

Loss being used: SoftMax
Epoch: 1 Loss: 1.5771268453136747495
Epoch: 2 Loss: 0.71707562545964573686
Epoch: 3 Loss: 0.46468617766372714674
Epoch: 4 Loss: 0.34864079316294555535
Epoch: 5 Loss: 0.2800157578257109242
Epoch: 6 Loss: 0.23406156913045814598
Epoch: 7 Loss: 0.20089067172392036463
Epoch: 8 Loss: 0.17570075827173978088
Epoch: 9 Loss: 0.15582061618959241499
Epoch: 10 Loss: 0.13964699991594215164
Epoch: 11 Loss: 0.12616210259579738324
Epoch: 12 Loss: 0.11469128894484502669
Epoch: 13 Loss: 0.104776943103330936826
Epoch: 14 Loss: 0.09610440157645325042
Epoch: 15 Loss: 0.0884531116789761604
Accuracy: 95.55555555555556
Loss being used: SVM_Loss
Epoch: 1 Loss: 9.524972621172605
Epoch: 2 Loss: 9.199299868638214
Epoch: 3 Loss: 9.221789978790278
Epoch: 4 Loss: 9.205218124052012
Epoch: 5 Loss: 9.186366232469398
Epoch: 6 Loss: 9.206279598338343
Epoch: 7 Loss: 9.245864870902942
Epoch: 8 Loss: 9.245948348685298
Epoch: 9 Loss: 9.245996098137132
Epoch: 10 Loss: 9.246041758218132
Epoch: 11 Loss: 