In [130]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
import warnings
warnings.filterwarnings("ignore")
from sklearn.preprocessing import normalize, MinMaxScaler

np.random.seed(132)

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
labels = pd.read_csv('/content/drive/MyDrive/Colab projects/Neural_Network_From_Scratch/data/Labels.csv')
data = pd.read_csv('/content/drive/MyDrive/Colab projects/Neural_Network_From_Scratch/data/Data.csv')

In [4]:
labels = labels.drop(columns=['Unnamed: 0'])
data = data.drop(columns=['Unnamed: 0'])

In [5]:
X_train = data.to_numpy()
X_train.shape

(13611, 16)

In [6]:
enc = OneHotEncoder()
ohe_labels = enc.fit_transform(labels)

In [7]:
y_train = ohe_labels.toarray()
y_train.shape

(13611, 7)

In [16]:
y_train[:5]

array([[0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1., 0.]])

In [8]:
X_train[:5]

array([[2.83950000e+04, 6.10291000e+02, 2.08178117e+02, 1.73888747e+02,
        1.19719142e+00, 5.49812187e-01, 2.87150000e+04, 1.90141097e+02,
        7.63922518e-01, 9.88855999e-01, 9.58027126e-01, 9.13357755e-01,
        7.33150600e-03, 3.14728900e-03, 8.34222388e-01, 9.98723889e-01],
       [2.87340000e+04, 6.38018000e+02, 2.00524796e+02, 1.82734419e+02,
        1.09735646e+00, 4.11785251e-01, 2.91720000e+04, 1.91272751e+02,
        7.83968133e-01, 9.84985603e-01, 8.87033637e-01, 9.53860842e-01,
        6.97865900e-03, 3.56362400e-03, 9.09850506e-01, 9.98430331e-01],
       [2.93800000e+04, 6.24110000e+02, 2.12826130e+02, 1.75931143e+02,
        1.20971266e+00, 5.62727317e-01, 2.96900000e+04, 1.93410904e+02,
        7.78113248e-01, 9.89558774e-01, 9.47849473e-01, 9.08774239e-01,
        7.24391200e-03, 3.04773300e-03, 8.25870617e-01, 9.99066137e-01],
       [3.00080000e+04, 6.45884000e+02, 2.10557999e+02, 1.82516516e+02,
        1.15363806e+00, 4.98615976e-01, 3.07240000e+04, 1.954

### Normalizing values since they are very large



In [139]:
X_train_norm = normalize(X_train)

In [140]:
mms = MinMaxScaler()
X_train_mms = mms.fit_transform(X_train)

In [70]:
class ActivationFunction():
    def __init__(self):
        self.name = self.__class__.__name__

class Sigmoid(ActivationFunction):
    def __init__(self):
        super().__init__()

    def forward(self, X):
      return 1.0/(1.0+np.exp(-X))

    def backward(self, X):
      val = self.forward(X)
      return val*(1-val)

class Softmax(ActivationFunction):
    def __init__(self):
        super().__init__()

    def forward(self, x):
        #print(f'input : {x}')
        #input is a matrix. Return element wise softmax
        #f = np.exp(x - np.max(x))  # shift values
        #print(f'output : {np.exp(x)/(np.sum(np.exp(x)))}')
        #return f / f.sum(axis=0)
        return np.exp(x)/(np.sum(np.exp(x)))

    def backward(self, x):
        softmax = self.forward(x)
        return softmax*(1-softmax)

class ReLU(ActivationFunction):
    def __init__(self):
        super().__init__()

    def forward(self,X):
        return X * (X > 0)

    def backward(self,X):
        X[X <= 0.0] = 0.0
        X[X > 0.0] = 1.0
        return X

In [78]:
class LossFunction():

    def __init__(self):
        self.name = self.__class__.__name__

class CrossEntropyLoss(LossFunction):

    def __init__(self):
        super().__init__()

    def computeLoss(self, Y_true, Y_pred):
      for p in Y_pred[0]:
        if np.isnan(p) or p<10e-8:
          p=10e-8
      loss=np.multiply(Y_pred,Y_true)
      loss=loss[loss!=0]
      loss=-np.log(loss)
      loss=np.mean(loss)

      return loss

    def backward(self, Y_pred,Y_true):
      return -Y_true/(Y_pred)

    def last_output_derivative(self, Y_pred,Y_true):
      for p in Y_pred[0]:
        if np.isnan(p) or p<10e-8:
          p=10e-8
      return -(Y_true - Y_pred)





In [123]:
def computeAccuracy(y_true, y_pred):
  true_count = 0
  total = len(y_pred)
  for i in range(len(y_pred)):
    #print(f'True {np.argmax(y_true)} pred {np.argmax(y_pred)}')
    if (np.argmax(y_true[i]) == np.argmax(y_pred[i])):
      true_count += 1

  return true_count/total

In [148]:
class NeuralNetworkClassifier():

    def __init__(self, layer_weights=[32,64], lr = 0.000005, activation_fn = ReLU(), output_fn = Softmax(), loss_fn=CrossEntropyLoss()):
        self.layers = layer_weights
        self.eta = lr
        self.output_layer = None
        self.input_layer = None
        self.loss_fn = loss_fn
        self.output_fn = output_fn
        self.activation_fn = activation_fn

        #Weights and biases are to be stored as KV pairs
        self.W = {}
        self.B = {}

        #Intermediate computations to be stored as KV pairs
        self.A = {}
        self.H = {}

        #Gradients to be stored for weights, biases and intermediate layers
        self.dW = {}
        self.dB = {}
        self.dH = {}
        self.dA = {}



    def fit(self, X, Y, epochs=10):
        '''
        Mandatory call for fitting training data on the model.
        Required step since input and output layer size are unknown.
        '''

        self.input_layer = X.shape[1] #num features
        self.output_layer = Y.shape[1] #num classes

        self.layers = [self.input_layer] + self.layers + [self.output_layer]

        print(f"Neural network layer sizes : {self.layers}")

        #initialize weights for the layers
        self.initialize_weights()



        for epoch in range(epochs):
            self.dW = {}
            self.dB = {}
            self.dH = {}
            self.dA = {}


            #Call forward propagation to train model
            preds = []
            iteration = 0
            for x,y in zip(X,Y):
                self.forward_propagation(x)
                preds.append(self.H[(len(self.layers)-1)])

                if iteration==0:
                    for layer_num in range(1,len(self.layers)):
                        self.dW[layer_num] = np.zeros((self.layers[layer_num-1], self.layers[layer_num]))

                        self.dB[layer_num] = np.zeros((1,self.layers[layer_num]))

                        self.dH[layer_num] = np.zeros((self.H[layer_num].shape[0], self.H[layer_num].shape[1]))

                        self.dA[layer_num] = np.zeros((self.A[layer_num].shape[0], self.A[layer_num].shape[1]))
                iteration +=1
                #Perform backpropagation over losses
                self.back_propagation(np.array(y), np.array(self.H[(len(self.layers)-1)]).squeeze())

            preds = np.array(preds).squeeze()
            #Compute loss here to check performance
            epoch_loss = self.loss_fn.computeLoss(np.array(Y), np.array(preds))
            accuracy = computeAccuracy(Y, preds)

            print(f"Epoch {epoch} :: Loss {epoch_loss} :: Accuracy {accuracy}")
            self.update_weights()

        return preds


    def initialize_weights(self):
        '''
        Initialize the weights and biases once the layer sizes are known
        '''
        for layer_num in range(1,len(self.layers)):
            self.W[layer_num] = np.random.randn(self.layers[layer_num-1], self.layers[layer_num])
            print(f"Shape of W{layer_num} = {self.W[layer_num].shape }")
            self.B[layer_num] = np.random.randn(self.layers[layer_num])
            print(f"Shape of B{layer_num} = {self.B[layer_num].shape }")



    def forward_propagation(self, X):

        #x is a single datapoint (num_feats, )

        self.H = {}
        self.A = {}
        #Check if num features are accurate
        if X.shape[0] != self.input_layer:
            print(f"Invalid shape. {X.shape[1]} does not match {self.input_layer}")
            return

        #reshaped to have 1 row and all elements as column values (hence -1)
        self.H[0] = X.reshape(1,-1).astype(float)

        #Repeat from layer 1 till layer l-1 (for final layer the output activation will change, so that will be done separately)
        #For [i/p, l1, l2, o/p] sequence of layers, this loop runs for l1 and l2
        for layer in range(1, len(self.layers)-1):

            self.A[layer] = np.matmul(self.H[(layer-1)],self.W[layer]) + self.B[(layer)]
            self.H[layer] = self.activation_fn.forward(self.A[layer]) #compute sigmoid or orher activation here

        self.A[(len(self.layers)-1)] = np.matmul(self.H[(len(self.layers)-2)],self.W[(len(self.layers)-1)]) + self.B[(len(self.layers)-1)]
        self.H[(len(self.layers)-1)] = self.output_fn.forward(self.A[(len(self.layers)-1)])


        return

    def back_propagation(self, y_true, y_pred):

        dW_i = {}
        dB_i = {}
        #1. dloss/doutput (h_last_layer)

        self.dA[len(self.layers)-1] = self.loss_fn.last_output_derivative(self.H[len(self.layers)-1], y_true)

        for layer in range(len(self.layers) -2, 0, -1):

            dB_i[layer+1] = self.dA[layer+1]
            dW_i[layer+1] = np.matmul(self.H[layer].T,self.dA[layer+1]) #The dimensions of W and DW must match, hence H stays on the left

            self.dH[layer] = np.matmul(self.dA[layer+1],self.W[layer+1].T)
            self.dA[layer] = np.multiply(self.activation_fn.backward(self.A[layer]),self.dH[layer]) #Hadamard product (element wise multiplication)


        dW_i[1] = np.matmul(self.H[0].T,self.dA[1]) #The dimensions of W and DW must match, hence H stays on the left
        dB_i[1] = self.dA[1]

        for layer in range(1, len(self.layers)):
            self.dW[layer] += dW_i[layer]
            self.dB[layer] += dB_i[layer]


    def update_weights(self):

        for layer in range(1, len(self.layers)):
            self.W[layer] = self.W[layer]- self.eta*self.dW[layer]
            self.B[layer] = self.B[layer]- self.eta*self.dB[layer]



In [149]:
nn = NeuralNetworkClassifier([32, 64])
sample_preds = nn.fit(X_train_norm, y_train, epochs=20)

Neural network layer sizes : [16, 32, 64, 7]
Shape of W1 = (16, 32)
Shape of B1 = (32,)
Shape of W2 = (32, 64)
Shape of B2 = (64,)
Shape of W3 = (64, 7)
Shape of B3 = (7,)
Epoch 0 :: Loss 27.45505579455285 :: Accuracy 0.19366688707662919
Epoch 1 :: Loss 35.974693416576635 :: Accuracy 0.1416501359194769
Epoch 2 :: Loss 52.69086195539561 :: Accuracy 0.2605245757108221
Epoch 3 :: Loss 34.21109863014995 :: Accuracy 0.09712732348835501
Epoch 4 :: Loss 13.277097716851223 :: Accuracy 0.11975607964146646
Epoch 5 :: Loss 10.800464257703087 :: Accuracy 0.148923664682977
Epoch 6 :: Loss 7.90808288679784 :: Accuracy 0.2605245757108221
Epoch 7 :: Loss 5.141597560297759 :: Accuracy 0.1416501359194769
Epoch 8 :: Loss 4.087360841719096 :: Accuracy 0.148923664682977
Epoch 9 :: Loss 4.422611098562959 :: Accuracy 0.2605245757108221
Epoch 10 :: Loss 3.245622861770646 :: Accuracy 0.19366688707662919
Epoch 11 :: Loss 3.547900471644359 :: Accuracy 0.2605245757108221
Epoch 12 :: Loss 2.434496249439777 :: Accu