# Basic Backpropagation (Chain Rule)

# Single Neuron 

In [1]:
x = [-1.0, 2.0, -3.0]
w = [6.0, 8.0, -2.0]
b = 1.0

In [2]:
xw0 = x[0] * w[0]
xw1 = x[1] * w[1]
xw2 = x[2] * w[2]

In [3]:
z = xw0 + xw1 + xw2 + b

In [4]:
y = max(z, 0)
print(y)

17.0


In [5]:
dvalue = 1.0  # derivative of further layers

In [6]:
drelu_dz = dvalue * (1.0 if z > 0 else 0)
print(drelu_dz)

1.0


## Summation:
### f(x, y) = x + y --> $\frac{\partial \mathbf{f(x,y)}}{\partial x}$ = $\frac{\partial \mathbf{f(x,y)}}{\partial y}$ = 1

## Multiplication
### f(x, y) = x.y --> $\frac{\partial \mathbf{f(x,y)}}{\partial x}$ = x
### &emsp; &emsp; &emsp; &emsp; &emsp; &ensp; $\frac{\partial \mathbf{f(x,y)}}{\partial y}$ = y

In [7]:
dsum_dxw0 = 1
dsum_dxw1 = 1
dsum_dxw2 = 1
dsum_db = 1
drelu_dxw0 = drelu_dz * dsum_dxw0
drelu_dxw1 = drelu_dz * dsum_dxw1
drelu_dxw2 = drelu_dz * dsum_dxw2
drelu_db = drelu_dz * dsum_db
print(drelu_dxw0, drelu_dxw1, drelu_dxw2, drelu_db)

1.0 1.0 1.0 1.0


In [8]:
dmul_dx0 = w[0]
dmul_dx1 = w[1]
dmul_dx2 = w[2]
dmul_dw0 = x[0]
dmul_dw1 = x[1]
dmul_dw2 = x[2]
drelu_dx0 = drelu_dxw0 * dmul_dx0
drelu_dw0 = drelu_dxw0 * dmul_dw0
drelu_dx1 = drelu_dxw1 * dmul_dx1
drelu_dw1 = drelu_dxw1 * dmul_dw1
drelu_dx2 = drelu_dxw2 * dmul_dx2
drelu_dw2 = drelu_dxw2 * dmul_dw2
print(drelu_dx0, drelu_dw0, drelu_dx1, drelu_dw1, drelu_dx2, drelu_dw2)

6.0 -1.0 8.0 2.0 -2.0 -3.0


## Simplification

In [9]:
drelu_dx0 = dvalue * (1.0 if z > 0 else 0) * w[0]
drelu_dx1 = dvalue * (1.0 if z > 0 else 0) * w[1]
drelu_dx2 = dvalue * (1.0 if z > 0 else 0) * w[2]
drelu_dw0 = dvalue * (1.0 if z > 0 else 0) * x[0]
drelu_dw1 = dvalue * (1.0 if z > 0 else 0) * x[1]
drelu_dw2 = dvalue * (1.0 if z > 0 else 0) * x[2]
print(drelu_dx0, drelu_dw0, drelu_dx1, drelu_dw1, drelu_dx2, drelu_dw2)

6.0 -1.0 8.0 2.0 -2.0 -3.0


## Gradients 

In [10]:
dx = [drelu_dx0, drelu_dx1, drelu_dx2]
dw = [drelu_dw0, drelu_dw1, drelu_dw2]
db = drelu_db

In [11]:
print(dx, dw, db)

[6.0, 8.0, -2.0] [-1.0, 2.0, -3.0] 1.0


## Optimization Demo 

In [12]:
print(w, b)

[6.0, 8.0, -2.0] 1.0


In [13]:
w[0] += -0.001 * dw[0]
w[1] += -0.001 * dw[1]
w[2] += -0.001 * dw[2]
b += -0.001 * db

In [14]:
print(w, b)

[6.001, 7.998, -1.997] 0.999


### forward pass (updated weights and biases)

In [15]:
xw0 = x[0] * w[0]
xw1 = x[1] * w[1]
xw2 = x[2] * w[2]

In [16]:
z = xw0 +xw1 +xw2 + b

In [17]:
y = max(z, 0)
print(y)

16.985
