In [14]:
import numpy as np
import random
#We initalize a Layer of Size J, acting on input Vector X on of size K, with the associated Weigth Matrix, W of size J, K
    #and the Bias Vector with size 1*J.
class Layer: 
    def __init__(self, J, K):
        np.random.seed(2)
        #J: # of Neurons in layer ie the width of the Layer
        self.J = J
        #K: Size of Input Vetor
        self.K = K
        #W:weigth matrix of dimensions J x K , with  standard normal initializing
        self.W = np.random.normal(0, 1, (J, K))
        #B: Bias matrix of dimensions 1 x J, with standard normal initialzing  
        self.B = np.random.normal(0, 1, J)
        #Z: The last weighted input which the layer receives. Initalized at 0 
        self.Z = 0
        # A: The last activation A(Z). Initialized at 0 
        self.A = 0
        #g_l: Error of the Layer ie Gradient C with respect to weighted inputs Z in layer L, intialized at 0
        self.g_l = 0
        #sig_deriv: Derivative of sigmoind of forward pass:
        self.sig_deriv = 0
        #last_layer: True if output layer, else false. Initialized to be false
        self.last_layer = False
    
    #Layer acting on Vector X, which is either the previous layer activations, or the first input vector.  
    def forward_prop(self, X):
        #Check Dimensionalty
        if X.size != self.K:
            return "Error: Input Vector Dimensions do not match weight matrix"
        #Else proceed with forward propagation
        else:
            #Calculating weighted input Z
            self.Z = np.dot(self.W, X) + self.B
            #Passing Z trough Nonlinear Activation Function(, a Sigmoid in this case)
            self.A = self.sigmoid()
            #Store derivative of sigmoind
            self.sig_deriv = self.deriv_sigmoid()
            return self.A
    # Returns the Error of the Last Layer for a training example.
    # Let cost function take the form: C=0.5*(y_train-a_L)^2, where y_train is our desired output vector
    def output_error(self, y_train):
        sigmoid_derivative = self.deriv_sigmoid()
        Gradient_C = -1 * (y_train - self.A)
        #Hadamart Product:
        Output_Error = sigmoid_derivative * Gradient_C
        return Output_Error
    #Nonlinear Activation Function
    def sigmoid(self):
        return 1.0 / (1.0 + np.exp(-1 * self.Z))
    #Nonlinear derivative
    def deriv_sigmoid(self):
        return self.A * (1 - self.A)

#NeuralNet: Class which defines how the individual layer object relate to each other. 
class NeuralNet:
    #Pass in a List of layer sizes. The first index corresponds to input vector size, and the last index corresponds to the
    #output vector size
    def __init__(self, layer_sizes):
        self.layers = []
        # Iterate over pairs of adjacent layer sizes to create Layer instances
        for i in range(1, len(layer_sizes)):
            #Each layer has a width(number of neurons) and takes input of the size of the previous layer
            layer = Layer(layer_sizes[i], layer_sizes[i - 1])
            #last_layer: True if Outputlayer, else false
            layer.last_layer = (i == len(layer_sizes) - 1)
            #Appending individual layer to list
            self.layers.append(layer)
    #Forward pass for Input X:
    def forward_pass(self, X):
        input_Vector = X
        for layer in self.layers:
            input_Vector = layer.forward_prop(input_Vector)

    def backward_pass(self, Y_train, X):
        gradients_bias = []
        gradients_weights = []
        #Stores the output of the next layer to be resused for calculating the error. 
        next_layer_error = 0
        #Reversing trough layers in reversed fashion, from the first index up to the last
        
        for i in range(len(self.layers) - 1, -1, -1):
            #If Layer is the Output Layer:Calculate the output error. 
            if self.layers[i].last_layer:
                Output_Error = self.layers[i].output_error(Y_train)
                #Store Error of Layer for previous layer error calculations: 
                next_layer_error = Output_Error
                self.layers[i].g_l = Output_Error
                
                #Calculate gradients of Bias terms 
                #grad_BL_C=Output_Error
                
                #Append Output Error to Gradient for Biases, since a perturbation to z is the same as perturbing b(linear addition). 
                gradients_bias.append(Output_Error)
                
                #Get previous layer activations to calculate the error: 
                #Previosu Layer if !First Layer:
                prev_layer = self.layers[i - 1] if i != 0 else None
                #Getting Activation or if the first Layer getting Input Vector X.
                input_to_layer = prev_layer.A if prev_layer else X
                #Calculating gradients for weigths.
                grad_WL_C = np.outer(Output_Error, input_to_layer)
                #Append Weight to return for gradient descent
                gradients_weights.append(grad_WL_C)
            #Consider hidden layers
            else:
                #Calculate Error as a function of the next Layer's error:
                error_l = np.dot(self.layers[i + 1].W.T, self.layers[i + 1].g_l)
                error_l = error_l * self.layers[i].sig_deriv
                #Store error, for preceding layer
                next_layer_error = error_l
                self.layers[i].g_l = error_l
                #Append Bias Gradient to return for gradient descent
                gradients_bias.append(error_l)
                #Calculate Gradient of Weigths
                #If Hidden Layer do,
                if i != 0:
                    grad_Wl = np.outer(error_l, self.layers[i - 1].A)
                #Else if First Layer do
                else:
                    grad_Wl = np.outer(error_l, X)
                gradients_weights.append(grad_Wl)
        #reverse gradient List such that the last entry corresponds to the last layers gradients
        gradients_bias.reverse()
        gradients_weights.reverse()
        #Return for Gradient Descent:
        return gradients_bias, gradients_weights
    
    #Function which creates a training Pass
    #Input: Training Data:List of  Tuples of the X input Vector and Y_Train vector
    #Batchsize
    def training(self, Training_data, Batchsize, learning_rate, epochs):
        for epoch in range(epochs):
            training_epoch(Training_data, Batchsize, learning_rate)
        print("Training Complete")
        
        
    def training_epoch(self, Training_Data, Batchsize, learning_rate):
        if Batchsize > len(Training_Data):
            print("Batchsize larger than Training Data size. Adjusting Batchsize to Training Data size.")
            Batchsize = len(Training_Data)  # Adjust Batchsize to the size of Training_Data
            
            
        #Create Batch by randomly selecting Batchsize # of traiing data.
        Batch=random.sample(Training_Data, Batchsize)
        
        Gradients_Weights = []  
        Gradients_Biases = []

        
        for member in Batch:
            X_data= member[0]
            Y_Train= member[1]
            #Forward Pass: 
            self.forward_pass(X_data)
            
            #BackWard Pass to calculate gradients:
            gradient_bias, gradient_weights = self.backward_pass(Y_Train, X_data)
            
            Gradients_Weights.append(gradient_weights)
            Gradients_Biases.append(gradients_bias)
        
        # Get average of Gradient_Weights 
        # List to hold the sum of all gradient matrices
        Grad_Weights_AVG = []
        Grad_Bias_AVG = []

        # Loop over every matrix (layer) in the gradient list
        for matrix_index in range(len(Gradients_Weights[0])):
            # Storing matrix index array from the first batch
            Sum_Matrix = Gradients_Weights[0][matrix_index]
            # Summing up over all matrix index arrays from each batch
            for member_index in range(1, len(Gradients_Weights)):
                Sum_Matrix += Gradients_Weights[member_index][matrix_index]
            # Averaging the Gradient Matrix
            Avg_Matrix = Sum_Matrix / len(Gradients_Weights)
            Grad_Weights_AVG.append(Avg_Matrix)

        # Loop over every matrix (layer) in the bias gradient list
        for matrix_index in range(len(Gradients_Biases[0])):
            # Storing matrix index array from the first batch
            Sum_Matrix = Gradients_Biases[0][matrix_index]
            # Summing up over all matrix index arrays from each batch
            for member_index in range(1, len(Gradients_Biases)):
                Sum_Matrix += Gradients_Biases[member_index][matrix_index]
            # Averaging the Gradient
            Avg_Matrix = Sum_Matrix / len(Gradients_Biases)
            Grad_Bias_AVG.append(Avg_Matrix)

        #Update the Weigths of each Layer:
        for layer_index in range(len(self.layers)):
            #Update Weigths:
    
            self.layers[layer_index].W=self.layers[layer_index].W-learning_rate*Grad_Weights_AVG[layer_index]
        #Update Bias:
            self.layers[layer_index].B=self.layers[layer_index].B-learning_rate*Grad_Bias_AVG[layer_index]     
            

In [11]:
#Test Case for Forward Prop
# Initialize Layer
L1 = Layer(5, 4)
L1.W = np.ones((5, 4))
X_Test = np.ones(4)

# Manual Calculations
Z_Test = np.dot(L1.W, X_Test) + L1.B
A_Test = 1.0 / (1.0 + np.exp(-1 * Z_Test))

# Print Initial Parameters
print("Weights (W):")
print(L1.W)
print("Input Vector (X_Test):")
print(X_Test)
print("Biases (B):")
print(L1.B)

# Print Manual Calculations
print("Manual Calculation of Z (Z_Test):")
print(Z_Test)
print("Manual Calculation of Activation (A_Test):")
print(A_Test)

# Forward Propagation in Layer
L1.forward_prop(X_Test)

# Print Layer Output
print("Layer Output Z (L1.Z):")
print(L1.Z)
print("Layer Output Activation (L1.A):")
print(L1.A)


Weights (W):
[[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]
Input Vector (X_Test):
[1. 1. 1. 1.]
Biases (B):
[-0.87810789 -0.15643417  0.25657045 -0.98877905 -0.33882197]
Manual Calculation of Z (Z_Test):
[3.12189211 3.84356583 4.25657045 3.01122095 3.66117803]
Manual Calculation of Activation (A_Test):
[0.95778679 0.97903198 0.98602719 0.95307849 0.97494183]
Layer Output Z (L1.Z):
[3.12189211 3.84356583 4.25657045 3.01122095 3.66117803]
Layer Output Activation (L1.A):
[0.95778679 0.97903198 0.98602719 0.95307849 0.97494183]


In [24]:

# Initialize the neural network with a larger architecture
nn = NeuralNet([3, 5, 4, 7, 1])

# Parameters for the test
epsilon = 0.0001
X_Test = np.random.rand(3)  # Random test input with 3 features
Y_Train = np.array([1])     # Expected output

# Forward and backward passes
nn.forward_pass(X_Test)
gradients_bias, gradients_weights = nn.backward_pass(Y_Train, X_Test)

# Save the old weights and calculate the initial cost
old_weights = nn.layers[0].W.copy()
cost_nn = 0.5 * (Y_Train - nn.layers[-1].A) ** 2

# Choose a specific weight in the first layer to perturb  and Adjust indices as needed

weight_index = (0, 1)

W_test = old_weights[weight_index]
W_perturbed = W_test + epsilon
old_weights[weight_index] = W_perturbed

# Perform a forward pass with the perturbed weight
nn.layers[0].W = old_weights
nn.forward_pass(X_Test)

# Calculate the new cost with the perturbed weight
cost_new_nn = 0.5 * (Y_Train - nn.layers[-1].A) ** 2

# Approximate the gradient numerically
gradient_approx = (cost_new_nn - cost_nn) / epsilon

# Extract the corresponding analytically computed gradient
gradient_analytical = gradients_weights[0][weight_index]

# Compare both gradients
print("Numerical Gradient:", gradient_approx)
print("Analytical Gradient:", gradient_analytical)
print("Difference:", gradient_approx - gradient_analytical)


Numerical Gradient: [-0.00025252]
Analytical Gradient: -0.0002525163705311497
Difference: [-2.51527043e-09]


In [25]:
#Test Case for BackProp Algo when the output layer is the second layer
# Initialize a simple Neural Network: Input layer (2 neurons), Output layer (1 neuron) 
#Double check on piece of Paper
nn = NeuralNet([2, 1])

#print(nn.layers[0].B)

# Set weights and biases to known values for simplicity
nn.layers[0].W = np.array([[0.5, -0.5]]) # Weights for the first layer
nn.layers[0].B = np.array([0.0])         # Biases for the first layer


# Test input and expected output
X_Test = np.array([1, 0])  # Test input
Y_Train = np.array([1])    # Expected output

# Perform forward pass
nn.forward_pass(X_Test)

# Manually calculate the output layer error (assuming sigmoid activation and mean squared error)
last_layer = nn.layers[-1]
Z_Test = last_layer.Z
A_Test = last_layer.A
Error_Test = (A_Test-Y_Train) * A_Test * (1 - A_Test)  # Derivative of MSE with respect to Z
Gradient_B_Test= Error_Test

Gradient_W_Test = np.outer(Error_Test, X_Test)



# Perform backward pass
gradients_bias, gradients_weights = nn.backward_pass(Y_Train, X_Test)

# Printing results
results = {
    "Manual Calculation of Output Layer Error": Error_Test,
    "Manual Calculation of Output Layer Bias gradient": Gradient_B_Test,
    "Manual Calculation of Output Layer W gradient": Gradient_W_Test,
    "Backpropagation Output Layer Error (g_l)": last_layer.g_l,
    "Calculated Gradients (Bias and Weights) from Backpropagation": {
        "Bias Gradient": gradients_bias[0],
        "Weights Gradient": gradients_weights[0]
    }
}

results



{'Manual Calculation of Output Layer Error': array([-0.08872346]),
 'Manual Calculation of Output Layer Bias gradient': array([-0.08872346]),
 'Manual Calculation of Output Layer W gradient': array([[-0.08872346, -0.        ]]),
 'Backpropagation Output Layer Error (g_l)': array([-0.08872346]),
 'Calculated Gradients (Bias and Weights) from Backpropagation': {'Bias Gradient': array([-0.08872346]),
  'Weights Gradient': array([[-0.08872346, -0.        ]])}}