# Backpropagation for single layer

In [1]:
import numpy as np

In [2]:
# sample inputs, weights and biases
sample_inputs = np.array([[1.0, 2.0, 3.0, 2.5],[2.0, 5.0, -1.0, 2.0],[-1.5, 2.7, 3.3, -0.8]])
sample_weights = np.array([[0.2, 0.8, -0.5, 1.0],[0.5, -0.91, 0.26, -0.5],[-0.26, -0.27, 0.17, 0.87]]).T
sample_biases = np.array([[2.0, 3.0, 0.5]])

In [None]:
# Forward pass
layer_outputs = np.dot(sample_inputs, sample_weights) + sample_biases
relu_outputs = np.maximum(0,layer_outputs)

# backward pass
dvalues = np.array([[1.0, 1.0, 1.0],[2.0, 2.0, 2.0],[3.0, 3.0, 3.0]]) # sample next layer gradients
drelu = dvalues.copy()
drelu[layer_outputs <= 0] = 0 # gradients for relu
dinputs = np.dot(drelu,sample_weights.T) # gradients for inputs
dweights = np.dot(sample_inputs.T,drelu) # gradients for weights
dbiases = np.sum(drelu,axis=0,keepdims=True) # gradients for biases

In [4]:
print("Gradients with respect to Inputs : ",dinputs)
print("Gradients with respect to weights : ",dweights)
print("Gradients with respect to biases : ",dbiases)
print("Gradients with respect to relu : ",drelu)

Gradients with respect to Inputs :  [[ 0.44 -0.38 -0.07  1.37]
 [-0.12  1.06 -0.66  3.74]
 [ 1.32 -1.14 -0.21  4.11]]
Gradients with respect to weights :  [[ 0.5 -3.5  0.5]
 [20.1 10.1 20.1]
 [10.9 12.9 10.9]
 [ 4.1  0.1  4.1]]
Gradients with respect to biases :  [[6. 4. 6.]]
Gradients with respect to relu :  [[1. 1. 1.]
 [2. 0. 2.]
 [3. 3. 3.]]


# Backpropagation

In [None]:
class LayerDense:
    """ Fully connected Layer """
    def __init__(self,n_inputs,n_neurons):
        self.weights = 0.01 * np.random.randn(n_inputs,n_neurons)
        self.biases = np.zeros((1,n_neurons))
        
    def forward(self,inputs):
        """ y = mx + c """
        self.inputs = inputs
        self.outputs = np.dot(inputs,self.weights) + self.biases
        
    def backward(self,dvalues):
        self.dweights = np.dot(self.inputs.T,dvalues)
        self.dbiases = np.sum(dvalues,axis=0,keepdims=True)
        self.dinputs = np.dot(dvalues,self.weights.T)
        
class ActivationRELU:
    """ Rectified Linear Unit Activation Layer """
    def forward(self,inputs):
        """ y = max(0,x) """
        self.inputs = inputs
        self.outputs = np.maximum(0,inputs)
        
    def backward(self,dvalues):
        self.dinputs = dvalues.copy()
        self.dinputs[self.inputs <= 0] = 0

class ActivationSoftmax:
    """ Softmax Activation Layer - moslty used for classification """
    def forward(self,inputs):
        """ y = exp(x) / sum(exp(x)) """
        exp_values = np.exp(inputs - np.max(inputs,axis=1,keepdims=True))
        prob_values = exp_values / np.sum(exp_values,axis=1,keepdims=True)
        
        self.outputs = prob_values
        
class Loss:
    def calculate(self,output,y):
        sample_loss = self.forward(output,y)
        mean_loss = np.mean(sample_loss)
        return mean_loss
    
class LossCategoricalCrossEntropy(Loss):
    """ Categorical Cross Entropy Loss """
    def forward(self,y_pred,y_true):
        """ y = -sum(y_true * log(y_pred)) """
        n_samples = len(y_pred)
        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)
        correct_confidence = 0
        
        # taking out the correct true value from y_pred 
        if len(y_true.shape) == 1:
            correct_confidence = y_pred_clipped[range(n_samples),y_true]
        elif len(y_true.shape) == 2:
            # this is for one-hot encoded labels
            correct_confidence = np.sum(y_pred_clipped * y_true,axis=1)

        negative_log_likelihood = -np.log(correct_confidence)
        return negative_log_likelihood
       
# softmax and cross-entropy loss are combined for optimzation
class ActivationSoftmaxLossCategoricalCrossentropy:
    def __init__(self):
        self.activation = ActivationSoftmax()
        self.loss = LossCategoricalCrossEntropy()
        
    def forward(self,inputs,y_true):
        self.activation.forward(inputs)
        self.outputs = self.activation.outputs
        return self.loss.calculate(self.outputs,y_true)
    
    def backward(self,dvalues,y_true):
        samples = len(dvalues)
        
        if len(y_true.shape) == 2:
            y_true = np.argmax(y_true,axis=1)
        
        self.dinputs = dvalues.copy()
        self.dinputs[range(samples),y_true] -= 1
        self.dinputs /= samples

# Sampling

In [13]:
import nnfs
from nnfs.datasets import spiral_data

nnfs.init()

X,y = spiral_data(samples=100,classes=3)

dense1= LayerDense(2,3)
activation1 = ActivationRELU()
dense2 = LayerDense(3,3)
loss_activation = ActivationSoftmaxLossCategoricalCrossentropy()

In [14]:
# forward pass
dense1.forward(X)
activation1.forward(dense1.outputs)
dense2.forward(activation1.outputs)

loss = loss_activation.forward(dense2.outputs,y)
print("loss : ",loss)

predictions = np.argmax(loss_activation.outputs,axis=1)
if len(y.shape) == 2: 
   y = np.argmax(y,axis=1) 
accuracy = np.mean(predictions==y)
print("accuracy : ",accuracy)

loss :  1.0986104
accuracy :  0.34


In [15]:
# backward pass
loss_activation.backward(loss_activation.outputs,y)
dense2.backward(loss_activation.dinputs)
activation1.backward(dense2.dinputs)
dense1.backward(activation1.dinputs)

In [16]:
print(dense1.dweights) 
print(dense1.dbiases) 
print(dense2.dweights) 
print(dense2.dbiases) 

[[ 1.5766357e-04  7.8368583e-05  4.7324400e-05]
 [ 1.8161038e-04  1.1045573e-05 -3.3096312e-05]]
[[-3.60553473e-04  9.66117223e-05 -1.03671395e-04]]
[[ 5.44109462e-05  1.07411419e-04 -1.61822361e-04]
 [-4.07913431e-05 -7.16780924e-05  1.12469446e-04]
 [-5.30112993e-05  8.58172934e-05 -3.28059905e-05]]
[[-1.0729185e-05 -9.4610732e-06  2.0027859e-05]]
