# Basic Backpropagation (Chain rule) 

# Multiple Neurons 

### Single Sample 

In [1]:
import numpy as np

In [2]:
dvalues = np.ones((1, 3))

In [3]:
weights = np.random.randn(3, 4).T
print(weights[0])

[ 1.04481078 -1.72122044 -1.0619461 ]


In [4]:
dx0 = sum(weights[0] * dvalues[0])
dx1 = sum(weights[1] * dvalues[0])
dx2 = sum(weights[2] * dvalues[0])
dx3 = sum(weights[3] * dvalues[0])

print(dvalues[0].shape, weights.shape)

(3,) (4, 3)


#### Derivatives w.r.t Inputs  

In [5]:
dinputs = np.dot(dvalues[0], weights.T)

print(dinputs)

[-1.73835576 -1.26031036 -3.07298364 -1.8827701 ]


### Batch of Samples 

In [6]:
dvalues = np.random.randint(-5, 5, (3, 3))
dvalues = np.array([[1.0, 1.0, 1.0], [2.0, 2.0, 2.0], [3.0, 3.0, 3.0]])
print(dvalues)

[[1. 1. 1.]
 [2. 2. 2.]
 [3. 3. 3.]]


#### Derivatives w.r.t Inputs 

In [7]:
dinputs = np.dot(dvalues, weights.T)
print(dinputs)

[[-1.73835576 -1.26031036 -3.07298364 -1.8827701 ]
 [-3.47671151 -2.52062073 -6.14596728 -3.7655402 ]
 [-5.21506727 -3.78093109 -9.21895092 -5.6483103 ]]


In [8]:
inputs = np.random.randn(3, 4)
print(inputs)

[[ 0.13379076 -0.54080681  1.36572556  1.43359551]
 [ 0.55574981  1.20787588  0.6849298  -1.83143634]
 [ 0.6653529  -1.06750454  0.074401   -1.11193225]]


#### Derivatives w.r.t Weights 

In [9]:
dweights = np.dot(inputs.T, dvalues)

In [10]:
print(dweights)

[[ 3.2413491   3.2413491   3.2413491 ]
 [-1.32756868 -1.32756868 -1.32756868]
 [ 2.95878815  2.95878815  2.95878815]
 [-5.5650739  -5.5650739  -5.5650739 ]]


#### Derivatives w.r.t Biases 

In [11]:
biases = np.array([[2, 3, 0.5]])

In [12]:
dbiases = np.sum(dvalues, axis=0, keepdims=True)
print(dbiases)

[[6. 6. 6.]]


#### Derivatives w.r.t ReLU() 

In [13]:
z = np.random.randint(-8, 5, (3, 4))
print(z)

[[ 4  4  0  4]
 [-4 -7 -2 -6]
 [ 2 -2 -6  1]]


In [14]:
dvalues = np.random.randint(0, 10, (3, 4))
print(dvalues)

[[1 4 0 1]
 [3 3 4 4]
 [3 5 2 8]]


In [15]:
drelu = dvalues.copy()
drelu[z <= 0] = 0

print(drelu)

[[1 4 0 1]
 [0 0 0 0]
 [3 0 0 8]]
