# Problem 6.1.2
The Jupyter notebook [IntroML/Examples/Chapter6/MiniBatchGradientDescent.ipynb](https://github.com/satishchandrareddy/IntroML/blob/master/Examples/Chapter6/MiniBatch.ipynb) performs mini-batch gradient descent using the following split of X and Y:


$X = \begin{bmatrix} 1 & 2 & 4 & 1 & -1\\ -2 & -5 & -8 & -2 & 2 \end{bmatrix}, Y = \begin{bmatrix} 0 & 1 & 0 & 1 & 1\end{bmatrix}$

$X_{minibatch=0} = \begin{bmatrix} 1 & 2 & 4 \\ -2 & -5 & -8\end{bmatrix}, Y_{minibatch=0} = \begin{bmatrix} 0 & 1 & 0 \end{bmatrix}$

$X_{minibatch=1} = \begin{bmatrix} 1 & -1\\ -2 & 2 \end{bmatrix}, Y_{minibatch=1} = \begin{bmatrix} 1 & 1\end{bmatrix}$

Redo the 1 epoch of mini-batch gradient descent by using minibatch=1 first, then minibatch=0 second. Show that the order of the mini-batches does indeed affect the final values of W and b after 1 epoch.

In [1]:
import numpy as np

In [2]:
# batches
X0 = np.array([[1, 2, 4],[-2,-5,-8]])
Y0 = np.array([0, 1, 0])
X1 = np.array([[1,-1],[-2,2]])
Y1 = np.array([[1,1]])

# learning rate
alpha = 0.1

## Original Order:

In [3]:
# initial parameters
W = np.array([[0.1,0.1]])
b = np.array([[0.2]])

# Epoch 1, Mini-batch 0
# Forward propagation
Z0 = np.dot(W,X0) + b
A0 = 1/(1+np.exp(-Z0))
print("Forward Propagation: Minibatch0")
print("Z0: {}".format(Z0))
print("A0: {}".format(A0))

# Back Propagation
grad_A_L0 = -(Y0/A0 - (1-Y0)/(1-A0))/3
dA0dZ0 = A0 - np.square(A0)
grad_Z_L0 = grad_A_L0*dA0dZ0
grad_W_L = np.dot(grad_Z_L0,X0.T)
grad_b_L = np.sum(grad_Z_L0,keepdims=True)
print("Back Propagation: Minibatch0")
print("grad_A_L0: {}".format(grad_A_L0))
print("dA0dZ0: {}".format(dA0dZ0))
print("grad_Z_L0: {}".format(grad_Z_L0))
print("grad_W_L: {}".format(grad_W_L))
print("grad_b_L: {}".format(grad_b_L))

# Update W and b
W = W - alpha*grad_W_L
b = b - alpha*grad_b_L
print("W: {}".format(W))
print("b: {}".format(b))


# Epoch 1, Mini-batch 1
# Forward propagation
Z1 = np.dot(W,X1) + b
A1 = 1/(1+np.exp(-Z1))
print("Forward Propagation: Minibatch1")
print("Z1: {}".format(Z1))
print("A1: {}".format(A1))

# Back Propagation
grad_A_L1 = -(Y1/A1 - (1-Y1)/(1-A1))/2
dA1dZ1 = A1 - np.square(A1)
grad_Z_L1 = grad_A_L1*dA1dZ1
grad_W_L = np.dot(grad_Z_L1,X1.T)
grad_b_L = np.sum(grad_Z_L1,keepdims=True)
print("Back Propagation: Minibatch1")
print("grad_A_L1: {}".format(grad_A_L1))
print("dA1dZ1: {}".format(dA1dZ1))
print("grad_Z_L1: {}".format(grad_Z_L1))
print("grad_W_L: {}".format(grad_W_L))
print("grad_b_L: {}".format(grad_b_L))

# Update W and b
W = W - alpha*grad_W_L
b = b - alpha*grad_b_L
print("W: {}".format(W))
print("b: {}".format(b))

Forward Propagation: Minibatch0
Z0: [[ 0.1 -0.1 -0.2]]
A0: [[0.52497919 0.47502081 0.450166  ]]
Back Propagation: Minibatch0
grad_A_L0: [[ 0.70172364 -0.70172364  0.60624358]]
dA0dZ0: [[0.24937604 0.24937604 0.24751657]]
grad_Z_L0: [[ 0.17499306 -0.17499306  0.15005533]]
grad_W_L: [[ 0.42522827 -0.67546349]]
grad_b_L: [[0.15005533]]
W: [[0.05747717 0.16754635]]
b: [[0.18499447]]
Forward Propagation: Minibatch1
Z1: [[-0.09262106  0.46260999]]
A1: [[0.47686127 0.61363316]]
Back Propagation: Minibatch1
grad_A_L1: [[-1.04852297 -0.81481907]]
dA1dZ1: [[0.2494646  0.23708751]]
grad_Z_L1: [[-0.26156936 -0.19318342]]
grad_W_L: [[-0.06838594  0.13677188]]
grad_b_L: [[-0.45475278]]
W: [[0.06431577 0.15386916]]
b: [[0.23046975]]


## Switched Order:

In [4]:
# initial parameters
W = np.array([[0.1,0.1]])
b = np.array([[0.2]])

# Epoch 1, Mini-batch 1
# Forward propagation
Z1 = np.dot(W,X1) + b
A1 = 1/(1+np.exp(-Z1))
print("Forward Propagation: Minibatch1")
print("Z1: {}".format(Z1))
print("A1: {}".format(A1))

# Back Propagation
grad_A_L1 = -(Y1/A1 - (1-Y1)/(1-A1))/2
dA1dZ1 = A1 - np.square(A1)
grad_Z_L1 = grad_A_L1*dA1dZ1
grad_W_L = np.dot(grad_Z_L1,X1.T)
grad_b_L = np.sum(grad_Z_L1,keepdims=True)
print("Back Propagation: Minibatch1")
print("grad_A_L1: {}".format(grad_A_L1))
print("dA1dZ1: {}".format(dA1dZ1))
print("grad_Z_L1: {}".format(grad_Z_L1))
print("grad_W_L: {}".format(grad_W_L))
print("grad_b_L: {}".format(grad_b_L))

# Update W and b
W = W - alpha*grad_W_L
b = b - alpha*grad_b_L
print("W: {}".format(W))
print("b: {}".format(b))

# Epoch 1, Mini-batch 0
# Forward propagation
Z0 = np.dot(W,X0) + b
A0 = 1/(1+np.exp(-Z0))
print("Forward Propagation: Minibatch0")
print("Z0: {}".format(Z0))
print("A0: {}".format(A0))

# Back Propagation
grad_A_L0 = -(Y0/A0 - (1-Y0)/(1-A0))/3
dA0dZ0 = A0 - np.square(A0)
grad_Z_L0 = grad_A_L0*dA0dZ0
grad_W_L = np.dot(grad_Z_L0,X0.T)
grad_b_L = np.sum(grad_Z_L0,keepdims=True)
print("Back Propagation: Minibatch0")
print("grad_A_L0: {}".format(grad_A_L0))
print("dA0dZ0: {}".format(dA0dZ0))
print("grad_Z_L0: {}".format(grad_Z_L0))
print("grad_W_L: {}".format(grad_W_L))
print("grad_b_L: {}".format(grad_b_L))

# Update W and b
W = W - alpha*grad_W_L
b = b - alpha*grad_b_L
print("W: {}".format(W))
print("b: {}".format(b))

Forward Propagation: Minibatch1
Z1: [[0.1 0.3]]
A1: [[0.52497919 0.57444252]]
Back Propagation: Minibatch1
grad_A_L1: [[-0.95241871 -0.87040911]]
dA1dZ1: [[0.24937604 0.24445831]]
grad_Z_L1: [[-0.23751041 -0.21277874]]
grad_W_L: [[-0.02473166  0.04946333]]
grad_b_L: [[-0.45028915]]
W: [[0.10247317 0.09505367]]
b: [[0.24502891]]
Forward Propagation: Minibatch0
Z0: [[ 0.15739475 -0.02529309 -0.10550776]]
A0: [[0.53926766 0.49367707 0.4736475 ]]
Back Propagation: Minibatch0
grad_A_L0: [[ 0.72348585 -0.67520522  0.63328916]]
dA0dZ0: [[0.24845805 0.24996002 0.24930555]]
grad_Z_L0: [[ 0.17975589 -0.16877431  0.1578825 ]]
grad_W_L: [[ 0.47373727 -0.77870022]]
grad_b_L: [[0.16886407]]
W: [[0.05509944 0.17292369]]
b: [[0.22814251]]
