![image.png](attachment:d75c8ecf-4714-4502-aac6-08349ababe78.png)

In [17]:
import numpy as np 
 
# Passed in gradient from the next layer 
# for the purpose of this example we're going to use 
# a vector of 1s 
dvalues = np.array([[1., 1., 1.]]) 
 
# We have 3 sets of weights - one set for each neuron 
# we have 4 inputs, thus 4 weights 
# recall that we keep weights transposed 
weights = np.array([[0.2, 0.8, -0.5, 1], 
                    [0.5, -0.91, 0.26, -0.5], 
                    [-0.26, -0.27, 0.17, 0.87]]).T 
 
# sum weights of given input 
# and multiply by the passed in gradient for this neuron 
dx0 = sum(weights[0]*dvalues[0]) 
dx1 = sum(weights[1]*dvalues[0]) 
dx2 = sum(weights[2]*dvalues[0]) 
dx3 = sum(weights[3]*dvalues[0]) 
 
dinputs = np.array([dx0, dx1, dx2, dx3]) 
 
print(dinputs)

[ 0.44 -0.38 -0.07  1.37]


In [18]:
import numpy as np 
 
# Passed in gradient from the next layer 
# for the purpose of this example we're going to use 
# a vector of 1s 
dvalues = np.array([[1., 1., 1.]]) 
 
# We have 3 sets of weights - one set for each neuron 
# we have 4 inputs, thus 4 weights 
# recall that we keep weights transposed 
weights = np.array([[0.2, 0.8, -0.5, 1], 
                    [0.5, -0.91, 0.26, -0.5], 
                    [-0.26, -0.27, 0.17, 0.87]]).T 
 
# sum weights of given input 
# and multiply by the passed in gradient for this neuron 
dinputs = np.dot(dvalues[0], weights.T) 
 
print(dinputs) 

[ 0.44 -0.38 -0.07  1.37]


In [19]:
# 当有一组梯度下降的数据时

In [20]:
import numpy as np 
 
# Passed in gradient from the next layer 
# for the purpose of this example we're going to use 
# an array of an incremental gradient values 
dvalues = np.array([[1., 1., 1.], 
                    [2., 2., 2.], 
                    [3., 3., 3.]]) 
 
# We have 3 sets of weights - one set for each neuron 
# we have 4 inputs, thus 4 weights 
# recall that we keep weights transposed 
weights = np.array([[0.2, 0.8, -0.5, 1], 
                    [0.5, -0.91, 0.26, -0.5], 
                    [-0.26, -0.27, 0.17, 0.87]]).T
# sum weights of given input 
# and multiply by the passed in gradient for this neuron 
dinputs = np.dot(dvalues, weights.T) 
 
print(dinputs) 

[[ 0.44 -0.38 -0.07  1.37]
 [ 0.88 -0.76 -0.14  2.74]
 [ 1.32 -1.14 -0.21  4.11]]


In [21]:
# 所有代码合并到一起

In [23]:
import numpy as np 
 
# Passed in gradient from the next layer 
# for the purpose of this example we're going to use 
# an array of an incremental gradient values 
dvalues = np.array([[1., 1., 1.], 
                    [2., 2., 2.], 
                    [3., 3., 3.]]) 
 
# We have 3 sets of inputs - samples 
inputs = np.array([[1, 2, 3, 2.5], 
                   [2., 5., -1., 2], 
                   [-1.5, 2.7, 3.3, -0.8]]) 
# We have 3 sets of weights - one set for each neuron 
# we have 4 inputs, thus 4 weights 
# recall that we keep weights transposed 
weights = np.array([[0.2, 0.8, -0.5, 1], 
                    [0.5, -0.91, 0.26, -0.5], 
                    [-0.26, -0.27, 0.17, 0.87]]).T
# One bias for each neuron 
# biases are the row vector with a shape (1, neurons) 
biases = np.array([[2, 3, 0.5]]) 
# Forward pass 
layer_outputs = np.dot(inputs, weights) + biases  # Dense layer 
relu_outputs = np.maximum(0, layer_outputs)  # ReLU activation 
# Let's optimize and test backpropagation here 
# ReLU activation - simulates derivative with respect to input values 
# from next layer passed to current layer during backpropagation 
drelu = relu_outputs.copy() 
drelu[layer_outputs <= 0] = 0 
# Dense layer 
# dinputs - multiply by weights 
dinputs = np.dot(drelu, weights.T) 
# dweights - multiply by inputs 
dweights = np.dot(inputs.T, drelu) 
# dbiases - sum values, do this over samples (first axis), keepdims 
# since this by default will produce a plain list - 
# we explained this in the chapter 4 
dbiases = np.sum(drelu, axis=0, keepdims=True) 
# Update parameters 
weights += -0.001 * dweights 
biases += -0.001 * dbiases 
print(weights) 
print(biases)

[[ 0.179515   0.5003665 -0.262746 ]
 [ 0.742093  -0.9152577 -0.2758402]
 [-0.510153   0.2529017  0.1629592]
 [ 0.971328  -0.5021842  0.8636583]]
[[1.98489  2.997739 0.497389]]


In [24]:
print(drelu)

[[4.8   1.21  2.385]
 [8.9   0.    0.2  ]
 [1.41  1.051 0.026]]


In [25]:
print(dbiases)

[[15.11   2.261  2.611]]


In [26]:
# 如果我们使用对象进行写

In [28]:
# Dense layer 
class Layer_Dense: 
 
    # Layer initialization 
    def __init__(self, inputs, neurons): 
        self.weights = 0.01 * np.random.randn(inputs, neurons) 
        self.biases = np.zeros((1, neurons)) 
 
    # Forward pass 
    def forward(self, inputs):
        self.inputs = inputs 
        self.output = np.dot(inputs, self.weights) + self.biases 
     
    # Backward pass 
    def backward(self, dvalues): 
        # Gradients on parameters 
        self.dweights = np.dot(self.inputs.T, dvalues) 
        self.dbiases = np.sum(dvalues, axis=0, keepdims=True) 
        # Gradient on values ，值的下降梯度
        self.dinputs = np.dot(dvalues, self.weights.T)
    
 
# ReLU activation 
class Activation_ReLU: 
 
    # Forward pass 
    def forward(self, inputs): 
         # Remember input values 
        self.inputs = inputs 
        self.output = np.maximum(0, inputs)
    # Backward pass 
    def backward(self, dvalues):
        # Since we need to modify the original variable, 
        # let's make a copy of the values first 
        self.dinputs = dvalues.copy() 
 
        # Zero gradient where input values were negative 
        self.dinputs[self.inputs <= 0] = 0

In [30]:
# Common loss class 
class Loss: 
 
    # Calculates the data and regularization losses 
    # given model output and ground truth values 
    def calculate(self, output, y): 
 
        # Calculate sample losses 
        sample_losses = self.forward(output, y) 
 
        # Calculate mean loss 
        data_loss = np.mean(sample_losses) 
 
        # Return loss 
        return data_loss
# Cross-entropy loss 
class Loss_CategoricalCrossentropy(Loss): 
    ... 
    # Backward pass 
    def backward(self, dvalues, y_true): 
 
        # Number of samples 
        samples = len(dvalues) 
        # Number of labels in every sample 
        # We'll use the first sample to count them 
        labels = len(dvalues[0]) 
 
        # If labels are sparse, turn them into one-hot vector 
        if len(y_true.shape) == 1: 
            y_true = np.eye(labels)[y_true] 
 
        # Calculate gradient 
        self.dinputs = -y_true / dvalues 
        # Normalize gradient 
        self.dinputs = self.dinputs / samples 

In [36]:
# Softmax activation 
class Activation_Softmax: 
    # Backward pass 
    def backward(self, dvalues): 
 
        # Create uninitialized array 
        self.dinputs = np.empty_like(dvalues) 
 
        # Enumerate outputs and gradients 
        for index, (single_output, single_dvalues) in enumerate(zip(self.output, dvalues)): 
            # Flatten output array 
            single_output = single_output.reshape(-1, 1) 
            # Calculate Jacobian matrix of the output and 
            jacobian_matrix = np.diagflat(single_output) - np.dot(single_output, single_output.T) 
            # Calculate sample-wise gradient 
            # and add it to the array of sample gradients 
            self.dinputs[index] = np.dot(jacobian_matrix, 
                                         single_dvalues) 
