# Mini-Batch Example

In [1]:
import numpy as np

In [2]:
# batches
X0 = np.array([[1, 2, 4],[-2,-5,-8]])
Y0 = np.array([0, 1, 0])
X1 = np.array([[1,-1],[-2,2]])
Y1 = np.array([[1,1]])

In [3]:
# initial parameters
W = np.array([[0.1,0.1]])
b = np.array([[0.2]])
# learning rate
alpha = 0.1

### Epoch 1 Mini-batch 0

In [4]:
# Epoch 1, Mini-batch 0
# Forward propagation
Z0 = np.dot(W,X0) + b
A0 = 1/(1+np.exp(-Z0))
print("Forward Propagation: Batch0")
print("Z0: {}".format(Z0))
print("A0: {}".format(A0))

Forward Propagation: Batch0
Z0: [[ 0.1 -0.1 -0.2]]
A0: [[0.52497919 0.47502081 0.450166  ]]


In [5]:
# Back Propagation
grad_A_L0 = -(Y0/A0 - (1-Y0)/(1-A0))/3
dA0dZ0 = A0 - np.square(A0)
grad_Z_L0 = grad_A_L0*dA0dZ0
grad_W_L = np.dot(grad_Z_L0,X0.T)
grad_b_L = np.sum(grad_Z_L0,keepdims=True)
print("Back Propagation: Batch0")
print("grad_A_L0: {}".format(grad_A_L0))
print("dA0dZ0: {}".format(dA0dZ0))
print("grad_Z_L0: {}".format(grad_Z_L0))
print("grad_W_L: {}".format(grad_W_L))
print("grad_b_L: {}".format(grad_b_L))

Back Propagation: Batch0
grad_A_L0: [[ 0.70172364 -0.70172364  0.60624358]]
dA0dZ0: [[0.24937604 0.24937604 0.24751657]]
grad_Z_L0: [[ 0.17499306 -0.17499306  0.15005533]]
grad_W_L: [[ 0.42522827 -0.67546349]]
grad_b_L: [[0.15005533]]


In [6]:
# Update W and b
W = W - alpha*grad_W_L
b = b - alpha*grad_b_L
print("W: {}".format(W))
print("b: {}".format(b))

W: [[0.05747717 0.16754635]]
b: [[0.18499447]]


### Epoch 1 Mini-batch 1

In [7]:
# Epoch 1, Mini-batch 1
# Forward propagation
Z1 = np.dot(W,X1) + b
A1 = 1/(1+np.exp(-Z1))
print("Forward Propagation: Batch1")
print("Z1: {}".format(Z1))
print("A1: {}".format(A1))

Forward Propagation: Batch1
Z1: [[-0.09262106  0.46260999]]
A1: [[0.47686127 0.61363316]]


In [8]:
# Back Propagation
grad_A_L1 = -(Y1/A1 - (1-Y1)/(1-A1))/2
dA1dZ1 = A1 - np.square(A1)
grad_Z_L1 = grad_A_L1*dA1dZ1
grad_W_L = np.dot(grad_Z_L1,X1.T)
grad_b_L = np.sum(grad_Z_L1,keepdims=True)
print("Back Propagation: Batch1")
print("grad_A_L1: {}".format(grad_A_L1))
print("dA0dZ0: {}".format(dA1dZ1))
print("grad_Z_L1: {}".format(grad_Z_L1))
print("grad_W_L: {}".format(grad_W_L))
print("grad_b_L: {}".format(grad_b_L))

Back Propagation: Batch1
grad_A_L1: [[-1.04852297 -0.81481907]]
dA0dZ0: [[0.2494646  0.23708751]]
grad_Z_L1: [[-0.26156936 -0.19318342]]
grad_W_L: [[-0.06838594  0.13677188]]
grad_b_L: [[-0.45475278]]


### Update Rule
$Update_{epoch=i}=-\alpha v_{epoch=i}$

where, 

+ $v_{epoch=i} = \beta v_{epoch=i-1} + \nabla_WL_{epoch=i-1}, \quad v_{epoch=0} = 0$

In [9]:
# Update W and b
W = W - alpha*grad_W_L
b = b - alpha*grad_b_L
print("W: {}".format(W))
print("b: {}".format(b))

W: [[0.06431577 0.15386916]]
b: [[0.23046975]]
