In [1]:
import numpy as np

In [2]:
# inputs
X = np.array([[1,2,4],[-2,-5,-8]])
Y = np.array([[0,1,0]])
W1 = np.array([[0.5,0.5],[0.5,-0.5]])
b1 = np.array([[0.5],[0.5]])
W2 = np.array([[-1,1]])
b2 = np.array([[-0.1]])

In [3]:
# forward propagation
# layer 1
print("FORWARD PROPAGATION EXAMPLE")
Z1 = np.dot(W1,X)+b1
A1 = np.tanh(Z1)
print("Z1: \n{}".format(Z1))
print("A1: \n{}".format(A1))

FORWARD PROPAGATION EXAMPLE
Z1: 
[[ 0.  -1.  -1.5]
 [ 2.   4.   6.5]]
A1: 
[[ 0.         -0.76159416 -0.90514825]
 [ 0.96402758  0.9993293   0.99999548]]


In [4]:
# layer2
Z2 = np.dot(W2,A1)+b2
A2 = 1/(1+np.exp(-Z2))
print("Z2: {}".format(Z2))
print("A2: {}".format(A2))

Z2: [[0.86402758 1.66092346 1.80514373]]
A2: [[0.70350144 0.84036193 0.85877393]]


In [5]:
# back propagation
# derivative of loss
print("BACK PROPAGATION EXAMPLE")
dLdA2 = -1/3*(Y/A2 - (1-Y)/(1-A2))
print("dLdA2: {}".format(dLdA2))

BACK PROPAGATION EXAMPLE
dLdA2: [[ 1.12423257 -0.39665449  2.3602818 ]]


In [6]:
# layer 2
dA2dZ2 = A2 - np.square(A2)
print("dA2dZ2: {}".format(dA2dZ2))
dLdZ2 = dLdA2*dA2dZ2
print("dLdZ2: {}".format(dLdZ2))
grad_W2L = np.dot(dLdZ2,A1.T)
grad_b2L = np.sum(dLdZ2,axis=1,keepdims=True)
print("grad_W2L: {}".format(grad_W2L))
print("grad_b2L: {}".format(grad_b2L))

dA2dZ2: [[0.20858716 0.13415376 0.12128127]]
dLdZ2: [[ 0.23450048 -0.05321269  0.28625798]]
grad_W2L: [[-0.21857943  0.45914461]]
grad_b2L: [[0.46754577]]


In [7]:
# layer 1
dLdA1 = np.dot(W2.T,dLdZ2)
print("dLdA1: \n{}".format(dLdA1))
dA1dZ1 = 1 - np.square(A1)
print("dA1dZ1: \n{}".format(dA1dZ1))
dLdZ1 = dLdA1*dA1dZ1
print("dLdZ1: \n{}".format(dLdZ1))
grad_W1L = np.dot(dLdZ1,X.T)
grad_b1L = np.sum(dLdZ1,axis=1,keepdims=True)
print("grad_W1L: \n{}".format(grad_W1L))
print("grad_b1L: \n{}".format(grad_b1L))

dLdA1: 
[[-0.23450048  0.05321269 -0.28625798]
 [ 0.23450048 -0.05321269  0.28625798]]
dA1dZ1: 
[[1.00000000e+00 4.19974342e-01 1.80706639e-01]
 [7.06508249e-02 1.34095068e-03 9.04127676e-06]]
dLdZ1: 
[[-2.34500481e-01  2.23479649e-02 -5.17287166e-02]
 [ 1.65676524e-02 -7.13555943e-05  2.58813758e-06]]
grad_W1L: 
[[-0.39671942  0.77109087]
 [ 0.01643529 -0.03279923]]
grad_b1L: 
[[-0.26388123]
 [ 0.01649888]]


In [8]:
alpha = 0.1
W1 = W1 - alpha*grad_W1L
b1 = b1 - alpha*grad_b1L
W2 = W2 - alpha*grad_W2L
b2 = b2 - alpha*grad_b2L
print("W1 update: \n{}".format(W1))
print("b1 update: \n{}".format(b1))
print("W2 update: {}".format(W2))
print("b2 update: {}".format(b2))

W1 update: 
[[ 0.53967194  0.42289091]
 [ 0.49835647 -0.49672008]]
b1 update: 
[[0.52638812]
 [0.49835011]]
W2 update: [[-0.97814206  0.95408554]]
b2 update: [[-0.14675458]]


In [9]:
print("PREDICTION EXAMPLE")
# inputs
X = np.array([[1,2,4],[-2,-5,-8]])
Y = np.array([[0,1,0]])
W1 = np.array([[0.5,0.5],[0.5,-0.5]])
b1 = np.array([[0.5],[0.5]])
W2 = np.array([[-1,1]])
b2 = np.array([[-0.1]])
# layer 1
Z1 = np.dot(W1,X)+b1
A1 = np.tanh(Z1)
print("Z1: {}".format(Z1))
print("A1: {}".format(A1))
# layer2
Z2 = np.dot(W2,A1)+b2
A2 = 1/(1+np.exp(-Z2))
print("Z2: {}".format(Z2))
print("A2: {}".format(A2))
# prediction
P = np.round(A2)
print("Prediction (round A2): {}".format(P))

PREDICTION EXAMPLE
Z1: [[ 0.  -1.  -1.5]
 [ 2.   4.   6.5]]
A1: [[ 0.         -0.76159416 -0.90514825]
 [ 0.96402758  0.9993293   0.99999548]]
Z2: [[0.86402758 1.66092346 1.80514373]]
A2: [[0.70350144 0.84036193 0.85877393]]
Prediction (round A2): [[1. 1. 1.]]
