In [107]:
import numpy as np
import pandas as pd

In [108]:
def ReLU(x):
    return np.maximum(0, x)

In [109]:
def softmax(x):
    exponents = np.exp(x)
    return exponents / np.sum(exponents)

In [110]:
def forward_pass(X, W, b, activation = None):
    """
    Args:
        X (np.array, 1 dimensional vector) : Input Vector, outptut of the previous NN layer
        W (np.array, 2 dimensional vector) : Weights of the layer
        b (np.float32) : Bias value of the layer
        activation (string) : The activation function of the layer, applied to the output
    Returns:
        W (np.array, 1 dimenstional vector) : The outputs of each neuron in the layer
    """

    input_shape = X.shape[0]
    output_shape = W.shape[0]

    W = np.matmul(W.T, X) + b

    if activation == 'ReLU':
        W = ReLU(W)
    elif activation == 'softmax':
        W = softmax(W)


    return W

In [111]:
# Testing the forward pass function

W1 = np.array([[1,2], [3,4]]).astype(np.float32)
X1 = np.array([-1.8,3]).astype(np.float32)
b1 = 0

prediction = forward_pass(X=X1,
             W=W1,
             b=b1,
             activation='softmax')
prediction

array([0.23147525, 0.76852477], dtype=float32)

In [112]:
class SparseCategoricalCrossEntropy():
    @staticmethod
    def loss(y, y_pred):
        """
        Calculates and returns the loss of the prediction
        Args:
            y (int) : The label of the output (range: 0 to n-1)
            y_pred (np.array, shape=(1,n)) : The output probabilities

        Returns:
            loss (np.float) : The categorical cross entropy loss wrt to given predictions
        """
        return np.log(y_pred[y])
    
    @staticmethod
    def gradient(y, y_pred):
        """
        Returns the gradients of the loss wrt to the output probabilities
        Args:
            y (int) : The label of the output (range: 0 to n-1)
            y_pred (np.array, shape=(n,)) : The output probabilities

        Returns:
            dL_dpred (array, shape=(n,)) : The gradients of loss wrt to the probabilities
        """

        gradient = np.zeros(y_pred.shape)
        gradient[y] = -1 / y_pred[y]

        return gradient


In [113]:
# Testing the SparseCategoricalCrossEntropy Function
Y_true = 1
loss = SparseCategoricalCrossEntropy.loss(y=Y_true,
                                 y_pred=prediction)
loss

-0.26328245

In [114]:
def backward_pass(X, W, b, output, activation, da):
    if activation == None:
        dz = da
    elif activation == 'softmax':
        dz = output * da
    elif activation == 'ReLU':
        dz = np.greater(output, 0) * da
    else:
        raise Exception("Activation funciton not found. Check the activation is either None, softmax or ReLU")
    
    dw = np.matmul(X.reshape(-1,1), dz.reshape(1,-1))
    return dz, dw

In [115]:
# Testing the backward pass function
da = [0, 1]
backward_pass(X=X1,
              W=W1,
              b=b1,
              output=prediction,
              activation='ReLU',
              da=da)

(array([0, 1]),
 array([[ 0.        , -1.79999995],
        [ 0.        ,  3.        ]]))

In [116]:
initial_weights = pd.read_csv('Task_1/b/w-100-40-4.csv', header=None)
# initial_weights = pd.read_csv('Task_1/a/w.csv', header=None)

In [117]:
initial_biases =  pd.read_csv('Task_1/b/b-100-40-4.csv', header=None)
# initial_biases = pd.read_csv('Task_1/a/b.csv', header=None)

In [118]:
X = np.array([-1, 1, 1, 1, -1, -1, 1, -1, 1, 1, -1, -1, 1, 1]) # As given in data_point.txt

In [119]:
w1 = initial_weights.iloc[0:14, 1:].to_numpy().astype(np.float32)
w2 = initial_weights.iloc[14:114, 1:41].to_numpy().astype(np.float32)
w3 = initial_weights.iloc[114:, 1:5].to_numpy().astype(np.float32)

In [120]:
b1 = initial_biases.iloc[0, 1:].to_numpy().astype(np.float32)
b2 = initial_biases.iloc[1, 1:41].to_numpy().astype(np.float32)
b3 = initial_biases.iloc[2, 1:5].to_numpy().astype(np.float32)

In [121]:
# Label given for datapoint in data_point.txt
y = 3

In [122]:
class NNLayer():
    def __init__(self, input_shape, output_shape, activation=None, W=None, b=None, dtype=np.float64):
        self.input_shape = input_shape
        self.output_shape = output_shape
        self.X = np.zeros(input_shape)
        self.W = np.random.randn(input_shape, output_shape) if W is None else W
        self.b = np.random.randn(output_shape, 1) if b is None else b
        self.activation = activation # One of None, 'ReLU' or 'softmax'
        self.dW = np.zeros(self.W.shape)
        self.db = np.zeros(self.b.shape)
        self.dz = np.zeros(self.output_shape)
        self.a = 0

        self.dtype = dtype


    def display(self):
         print(f"This layer has an input shape of {self.input_shape}")
         print(f"This layer has an output shape of {self.output_shape}")
         print("\nThe Weights are, ")
         print(self.W)
         print("\nThe biases are: ")
         print(self.b)
         print("\nThe gradients of W are: ")
         print(self.dW)
         print("\nThe gradients of b are: ")
         print(self.db)


    def forward(self, X):
        """
        Args:
            X (np.array, 1 dimensional vector) : Input Vector, outptut of the previous NN layer

        Returns:
            W (np.array, 1 dimenstional vector) : The outputs of each neuron in the layer
        """

        # Save the input, will be used during backpropogation
        self.X = X

        # Multiply the inputs by weights and add the bias
        z = np.matmul(self.W.T, X) + self.b

        # Apply the activation function
        if self.activation == 'ReLU':
            self.a = ReLU(z)
        elif self.activation == 'softmax':
            self.a = softmax(z)
        else:
            self.a = z
            
        return self.a
    
    def backward(self, gradient):
        """
        Args:
            gradient (np.array, 1 dimensional vector) : The gradient of loss wrt to the output of the layer dL/da 
        
        Returns:
            dL/dX (np.array, shape=(input_shape,)) : The gradient of loss wrt to the input of the layer
        """

        ## 1. First compute the gradient of loss wrt to z
        if self.activation == None:
            dz = gradient.astype(np.float32)
        elif self.activation == 'softmax':
            # Compute the Jacobian, da/dz
            J_da_dz = np.zeros((self.output_shape, self.output_shape))
            for index, _ in np.ndenumerate(J_da_dz):
                # J[i,j] = a_i * (I(i==j) - a_j)
                i, j = index[0], index[1]
                J_da_dz[index] = self.a[i] * ( (i==j) - self.a[j] )
            dz = np.matmul(J_da_dz, gradient.reshape(-1,1)).astype(self.dtype)
        elif self.activation == 'ReLU':
            dz = (np.greater(self.a, 0) * gradient).astype(self.dtype)
        else:
            raise Exception("Activation funciton not found. Check the activation is either None, softmax or ReLU")


        ## 2. Use dL/dz to compute dw, db, dX
        self.dW = np.matmul(self.X.reshape(-1,1), dz.reshape(1,-1)).astype(self.dtype)
        self.db = dz.reshape(-1,1).astype(self.dtype)
        dX = np.matmul(dz.reshape(1,-1), self.W.T).squeeze().astype(self.dtype)


        return dX

In [123]:
NUM_LAYERS = 4 # Inclusive of input and output layers
LAYER_SIZES = [14, 100, 40, 4]

layer1 = NNLayer(LAYER_SIZES[0], LAYER_SIZES[1], activation='ReLU', W=w1, b=b1, dtype=np.float32)
layer2 = NNLayer(LAYER_SIZES[1], LAYER_SIZES[2], activation='ReLU', W=w2, b=b2, dtype=np.float32)
layer3 = NNLayer(LAYER_SIZES[2], LAYER_SIZES[3], activation='softmax', W=w3, b=b3, dtype=np.float32)

In [124]:
a1 = layer1.forward(X)
a2 = layer2.forward(a1)
a3 = layer3.forward(a2)
a3

array([1.88258338e-16, 1.08619921e-68, 1.47340802e-78, 1.00000000e+00])

In [125]:
gradient3 = SparseCategoricalCrossEntropy.gradient(y=y, y_pred=layer3.a)
gradient2 = layer3.backward(gradient3)
gradient1 = layer2.backward(gradient2)
gradient0 = layer1.backward(gradient1)

In [126]:
layer3.db

array([[ 1.8825834e-16],
       [ 0.0000000e+00],
       [ 0.0000000e+00],
       [-1.1102230e-16]], dtype=float32)

In [127]:
with open('Task_1/dw_new.csv', 'w') as f:
    np.savetxt(f, layer1.dW, delimiter=',')
    np.savetxt(f, layer2.dW, delimiter=',')
    np.savetxt(f, layer3.dW, delimiter=',')

In [128]:
with open('Task_1/db_new.csv', 'w') as f:
    np.savetxt(f, layer1.db.T, delimiter=',')
    np.savetxt(f, layer2.db.T, delimiter=',')
    np.savetxt(f, layer3.db.T, delimiter=',')

In [129]:
# read these 2 csvs
import pandas as pd
dw = pd.read_csv('Task_1/dw_new.csv', header=None)
db = pd.read_csv('Task_1/db_new.csv', header=None)

dw.shape, db.shape


((154, 100), (3, 100))

In [130]:
def train(self, X_train, y_train, X_test=None, y_test=None, epochs=100, learning_rate=0.1):
        training_losses = []
        training_accuracy = []
        validation_losses = []
        validation_accuracy = []
        #### Training Loop
        for epoch in range(epochs):
            print(f"Training ... Epoch: {epoch} | ", end=" ")

            ### Forward pass
            a = X_train.copy()
            for layer in self.layers:
                a = layer.forward(a)

            ### Calculate the loss
            loss, gradient = SparseCategoricalCrossEntropy.loss(y=y_train, y_pred=a, logits=True)
            training_losses.append(loss)

            accuracy = np.mean(np.argmax(self.predict(X_train, apply_softmax=True), axis=0) == y_train)
            training_accuracy.append(accuracy)

            print("Loss: ", loss, " | Accuracy: ", accuracy)

            ### Backward pass
            for layer in reversed(self.layers):
                gradient = layer.backward(gradient)
                layer.update_parameters(training_samples=X_train.shape[1], learning_rate=learning_rate)


            ### Validation
            if X_test is not None and y_test is not None:
                ### Forward pass
                a_test = X_test
                for layer in self.layers:
                    a_test = layer.forward(a_test, inference=True)

                loss_test, _ = SparseCategoricalCrossEntropy.loss(y=y_test, y_pred=a_test, logits=True)
                validation_losses.append(loss_test)

                accuracy= np.mean(np.argmax(self.predict(X_test, apply_softmax=True), axis=0) == y_test)
                validation_accuracy.append(accuracy)


        return training_losses, validation_losses, training_accuracy, validation_accuracy