In [1]:
import numpy as np

In [2]:
# Softmax Example
Z = np.array([[0.1,-0.1,-0.2],[-0.2, 0.2, 0.3],[-0.3,0.1,0.2],[0.4,-0.3,-0.5]])
Zexp = np.exp(Z)
Sum = np.sum(Zexp,axis=0,keepdims=True)
A = Zexp/Sum
print("Z: \n{}".format(Z))
print("A: \n{}".format(A))
# sum in row direction (down each column)
print("Sum of A in row direction: {}".format(np.sum(A,axis=0)))

Z: 
[[ 0.1 -0.1 -0.2]
 [-0.2  0.2  0.3]
 [-0.3  0.1  0.2]
 [ 0.4 -0.3 -0.5]]
A: 
[[0.26588694 0.22779083 0.20486076]
 [0.19697389 0.30748546 0.3377583 ]
 [0.17822935 0.27822435 0.30561635]
 [0.35890983 0.18649936 0.15176459]]
Sum of A in row direction: [1. 1. 1.]


In [3]:
# forward propagation
#training data
X = np.array([[1,2,4],[-2,-5,-8]])
Y = np.array([[0,1,2]])
# parameters
W1 = np.array([[0.5,0.5],[0.5,-0.5]])
b1 = np.array([[0.5],[0.5]])
W2 = np.array([[-1,1],[1,-1],[-2,1]])
b2 = np.array([[-0.1]])

In [4]:
# layer 1
Z1 = np.dot(W1,X) + b1
print("Z1: \n{}".format(Z1))
A1 = np.tanh(Z1)
print("A1: \n{}".format(A1))
# layer 2
Z2 = np.dot(W2,A1) + b2
print("Z2: \n{}".format(Z2))
Z2exp = np.exp(Z2)
Sum = np.sum(Z2exp,axis=0,keepdims=True)
A2 = Z2exp/Sum
print("A2: \n{}".format(A2))

Z1: 
[[ 0.  -1.  -1.5]
 [ 2.   4.   6.5]]
A1: 
[[ 0.         -0.76159416 -0.90514825]
 [ 0.96402758  0.9993293   0.99999548]]
Z2: 
[[ 0.86402758  1.66092346  1.80514373]
 [-1.06402758 -1.86092346 -2.00514373]
 [ 0.86402758  2.42251761  2.71029199]]
A2: 
[[0.46610686 0.31533481 0.28616887]
 [0.06778628 0.00931651 0.0063363 ]
 [0.46610686 0.67534868 0.70749484]]


In [5]:
def onehot(Y,nclass):
    ndata = Y.shape[1]
    Y_onehot = np.zeros((nclass,ndata))
    for count in range(ndata):
        Y_onehot[int(Y[0,count]),count] = 1.0
    return Y_onehot

In [6]:
print("BACK PROPAGATION")
# dLoss/dA2
Yonehot = onehot(Y,3)
print("Yonehot: \n{}".format(Yonehot))
dLossdA2 = -Yonehot/A2/3
print("dLossdA2: {}".format(dLossdA2))

BACK PROPAGATION
Yonehot: 
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]
dLossdA2: [[ -0.71514359  -0.          -0.        ]
 [ -0.         -35.77877846  -0.        ]
 [ -0.          -0.          -0.47114596]]


In [7]:
# LAYER 2
# dLoss/dZ2
prod2 = A2*dLossdA2
print("A2*dLossdA: {}".format(prod2))
sumterm = np.sum(prod2,axis=0)
print("sumterm: {}".format(sumterm))
sumprod = A2*sumterm
print("sumprod: {}".format(sumprod))
dLossdZ2 = prod2 - A2*sumprod
print("dLossdZ2: {}".format(dLossdZ2))
dLossdW2 = np.dot(dLossdZ2,A1.T)
dLossdb2 = np.sum(dLossdZ2,axis=1)
dLossdA1 = np.dot(W2.T,dLossdZ2)
print("dLossdW2: {}".format(dLossdW2))
print("dLossdb2: {}".format(dLossdb2))
print("dLossdA1: {}".format(dLossdA1))

A2*dLossdA: [[-0.33333333 -0.         -0.        ]
 [-0.         -0.33333333 -0.        ]
 [-0.         -0.         -0.33333333]]
sumterm: [-0.33333333 -0.33333333 -0.33333333]
sumprod: [[-0.15536895 -0.1051116  -0.09538962]
 [-0.02259543 -0.0031055  -0.0021121 ]
 [-0.15536895 -0.22511623 -0.23583161]]
dLossdZ2: [[-2.60914798e-01  3.31453475e-02  2.72975400e-02]
 [ 1.53165977e-03 -3.33304401e-01  1.33828888e-05]
 [ 7.24185355e-02  1.52031947e-01 -1.66483685e-01]]
dLossdW2: [[-0.04995162 -0.19110853]
 [ 0.25383057 -0.33159091]
 [ 0.03490578  0.05526051]]
dLossdb2: [-0.20047191 -0.33175936  0.0579668 ]
dLossdA1: [[ 0.11760939 -0.67051364  0.30568321]
 [-0.19002792  0.51848169 -0.13919953]]


In [8]:
# LAYER 1
dA1dZ1 = 1-A1*A1
print("dA1/dZ1: {}".format(dA1dZ1))
dLossdZ1 = dLossdA1*dA1dZ1
print("dLossdZ1: {}".format(dLossdZ1))
dLossdW1 = np.dot(dLossdZ1,X.T)
dLossdb1 = np.sum(dLossdZ1,axis=1,keepdims=True)
print("dLossdW1: {}".format(dLossdW1))
print("dLossdb1: {}".format(dLossdb1))

dA1/dZ1: [[1.00000000e+00 4.19974342e-01 1.80706639e-01]
 [7.06508249e-02 1.34095068e-03 9.04127676e-06]]
dLossdZ1: [[ 1.17609387e-01 -2.81598525e-01  5.52389862e-02]
 [-1.34256294e-02  6.95258383e-04 -1.25854146e-06]]
dLossdW1: [[-0.22463172  0.73086196]
 [-0.01204015  0.02338504]]
dLossdb1: [[-0.10875015]
 [-0.01273163]]


In [9]:
# Prediction Example
print("FORWARD PROPAGATION")
# layer 1
Z1 = np.dot(W1,X) + b1
print("Z1: {}".format(Z1))
A1 = np.tanh(Z1)
print("A1: {}".format(A1))
# layer 2
Z2 = np.dot(W2,A1) + b2
print("Z2: {}".format(Z2))
Z2exp = np.exp(Z2)
A2 = Z2exp/np.sum(Z2exp,axis=0,keepdims=True)
print("A2: {}".format(A2))
# prediction
P = np.argmax(A2,axis=0)
print("P: {}".format(P))

FORWARD PROPAGATION
Z1: [[ 0.  -1.  -1.5]
 [ 2.   4.   6.5]]
A1: [[ 0.         -0.76159416 -0.90514825]
 [ 0.96402758  0.9993293   0.99999548]]
Z2: [[ 0.86402758  1.66092346  1.80514373]
 [-1.06402758 -1.86092346 -2.00514373]
 [ 0.86402758  2.42251761  2.71029199]]
A2: [[0.46610686 0.31533481 0.28616887]
 [0.06778628 0.00931651 0.0063363 ]
 [0.46610686 0.67534868 0.70749484]]
P: [0 2 2]
