In [2]:
import pandas as pd
import numpy as np

## Extracting weight & bias matrices from csv files


In [55]:
def get_matrices(file_path):
    df = pd.read_csv(file_path,header=None)
    grouped = df.groupby(df.columns[0])
    weight_matrices = {}

    for name, group in grouped:
        weight_matrix = group.values.tolist()
        weight_matrices[name] = weight_matrix
    
    #clean up the weight matrices - remove the first column/remove nan values
    for key in weight_matrices:
        cleaned_matrix = []
        for row in weight_matrices[key]:
            row = row[1:]
            row = [value for value in row if not pd.isna(value)]
            cleaned_matrix.append(row)
        weight_matrices[key] = cleaned_matrix
    
    return weight_matrices


In [56]:
csv_file_path = './Task_1/a/w.csv'
weights = get_matrices(csv_file_path)
for layer_pair, matrix in weights.items():
    print(f"Weights for {layer_pair}:")
    for row in matrix:
        print(row)

Weights for weights btw layer0 to layer1:
[0.4714351594448089, -1.1909756660461426, 1.4327069520950315, -0.3126519024372101, -0.720588743686676, 0.8871629238128662, 0.8595883846282959, -0.6365234851837158, 0.0156963728368282, -2.242684841156006, 1.150035738945007, 0.9919460415840148, 0.9533241391181946, -2.021254777908325, -0.3340773582458496, 0.0021183646749705, 0.4054534137248993, 0.289091944694519, 1.3211581707000732, -1.546905517578125, -0.20264632999897, -0.6559693217277527, 0.1934213787317276, 0.5534389019012451, 1.318151593208313, -0.4693052768707275, 0.675554096698761, -1.81702721118927, -0.1831085383892059, 1.0589691400527954, -0.3978402316570282, 0.3374376595020294, 1.0475785732269287, 1.04593825340271, 0.8637173175811768, -0.1220915764570236, 0.1247129514813423, -0.3227947950363159, 0.8416746854782104, 2.390960454940796, 0.0761995911598205, -0.5664459466934204, 0.0361419357359409, -2.0749776363372803, 0.2477921992540359, -0.8971567749977112, -0.1367948353290558, 0.0182891916

In [57]:
csv_file_path = './Task_1/a/b.csv'
biases = get_matrices(csv_file_path)
# print(biases)
for layer, matrix in biases.items():
    print(f"Biases for {layer}:")
    for row in matrix:
        print(row)

Biases for bias for layer1:
[-0.6351549625396729, -0.6810201406478882, -1.5015857219696045, -0.3689251244068146, -0.3529993593692779, 0.1273843944072723, 0.5175313353538513, 0.7478368282318115, -0.8093775510787964, -0.6326858997344971, -0.4466548264026642, -0.6869202256202698, 0.2093100845813751, 0.4803082346916199, 0.1385172009468078, -0.7901001572608948, -0.3858503401279449, 0.5576661825180054, 0.3433474004268646, 0.0673438683152198, -0.6158051490783691, -0.9106032252311708, 2.171043157577514, -0.0505546294152736, 1.5523521900177002, 0.7961558103561401, -0.545169472694397, -1.870703101158142, -0.5623654127120972, -1.783594846725464, 0.0706343799829483, -0.4745371043682098, 0.2523058652877807, -1.1109249591827393, -0.0898044556379318, -0.8365493416786194, -0.4144070744514465, 0.8533092141151428, 0.6142266392707825, -0.2385667562484741, -1.0552879571914673, 0.4334354698657989, 1.7641195058822632, 1.197646141052246, -0.9488682746887208, -0.468651682138443, -1.2601913213729858, 1.5802851

## Defining Activation functions & Error functions

### Rectified linear unit (ReLU)

In [58]:
def relu(x):
    return np.maximum(0,x)

def relu_derivative(x):
    return np.where(x>0, 1, 0)

### Softmax Function

In [60]:
def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

### Cross Entrophy Loss Function with Soft Max

In [62]:
def cross_entropy_loss(y, y_hat):
    return -np.sum(y*np.log(y_hat))/y.shape[0]

## Neural Network implementation with Forward and Backward propagation 

In [96]:
class NeuralNetwork:
    def __init__(self, weights, biases):
        self.weights = {k: np.array(v) for k, v in weights.items()}
        self.biases = {k: np.array(v) for k, v in biases.items()}

    def forward(self, input):
        """
        `activations` is a 2D list to store activation function outputs for each layer. 
        at the beginning, it only contains the input

        `before_activation` is a 2D list to store the weighted sum of the input and the bias for each layer
        """
        activations = [input]
        before_activation = []
        #input & hidden layers
        for i in range(len(self.weights) - 1):
            z = np.dot(activations[-1], self.weights[f'weights btw layer{i} to layer{i+1}']) + self.biases[f'bias for layer{i+1}']
            before_activation.append(z)
            activations.append(relu(z)) 

        #output layer
        z = np.dot(activations[-1], self.weights[f'weights btw layer{len(self.weights) - 1} to layer{len(self.weights)}']) + self.biases[f'bias for layer{len(self.weights)}']
        before_activation.append(z)
        activations.append(softmax(z))

        return activations, before_activation
    
    def backward(self, X, y_true, activations, z_values):
        m = y_true.shape[0]
        # Derivative of cross-entropy loss with respect to logits
        deltas = [activations[-1] - y_true]

        for i in reversed(range(len(self.weights) - 1)):
            delta = np.dot(deltas[-1], self.weights[f'weights btw layer{i+1} to layer{i+2}'].T) *relu_derivative(z_values[i])
            deltas.append(delta)

        deltas.reverse()

        grad_weights = {}
        grad_biases = {}

        for i in range(len(self.weights)):
            grad_weights[f'weights btw layer{i} to layer{i+1}'] = np.dot(activations[i].T, deltas[i]) / m
            grad_biases[f'bias for layer{i+1}'] = np.sum(deltas[i], axis=0) / m

        return grad_weights, grad_biases
    
    def save_gradients_to_csv(self, grad_weights, filename):
        with open(filename, mode='w', newline='') as file:
            for key, value in grad_weights.items():
                df = pd.DataFrame(value)
                df.to_csv(file, header=False, index=False)
                file.write('\n')


## Calculating Gradients

In [97]:
nn = NeuralNetwork(weights, biases)

In [100]:
input = np.array([[-1, 1, 1, 1, -1, -1, 1, -1, 1, 1, -1, -1, 1, 1]])
y_true = np.array([[0, 0, 0, 1]])

activations, z_values = nn.forward(input)
grad_weights, grad_biases = nn.backward(input, y_true, activations, z_values)



In [99]:
nn.save_gradients_to_csv(grad_weights, 'grad_weights.csv')