In [0]:
import numpy as np

In [0]:
np.random.seed(95)

In [0]:
m = 6
K = 4
alpha = 0.01

In [0]:
X = np.random.rand(3, m)

In [0]:
# 4 classes
labels = np.random.randint(K, size=m)

In [0]:
labels

array([2, 1, 3, 0, 0, 3])

In [0]:
# convert to one hot encoded
Y = np.zeros((4,6))
for i in range(m):
  Y[labels[i]][i] = 1

In [0]:
print("X: \n", X)
print("Labels: ",labels)
print("Y: \n",Y)

X: 
 [[0.22880349 0.19068802 0.88635967 0.7189259  0.53298338 0.8694621 ]
 [0.72423768 0.48208699 0.7560772  0.97473999 0.5083671  0.95849135]
 [0.49426336 0.51716733 0.34406231 0.96975023 0.25608847 0.40327522]]
Labels:  [2 1 3 0 0 3]
Y: 
 [[0. 0. 0. 1. 1. 0.]
 [0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 1.]]


## Initialize Weights

In [0]:
# units in each layer
# first number (3) is input dimension
units = [3, 5, 5, 4]

# Total layers
L = len(units) - 1

# parameter dictionary
parameters = dict()

for layer in range(1, L+1):
    parameters['W' + str(layer)] = np.random.rand(units[layer],units[layer-1])
    parameters['b' + str(layer)] = np.zeros((units[layer],1))

## Forward Pass

In [0]:
def sigmoid(X):
    return 1 / (1 + np.exp(- X))

def inv_sigmoid(X):
    return sigmoid(X) * (1-sigmoid(X))

In [0]:
cache = dict()
def forward_pass():
  cache['a0'] = X

  for layer in range(1, L+1):
      cache['Z' + str(layer)] = np.dot(parameters['W' + str(layer)],cache['a' + str(layer-1)]) + parameters['b' + str(layer)]
      cache['a' + str(layer)] = sigmoid(cache['Z' + str(layer)])

In [0]:
forward_pass()

In [0]:
cache['a'+str(len(units)-1)]

array([[0.94615211, 0.94420533, 0.94877725, 0.9511218 , 0.9450622 ,
        0.95004851],
       [0.86897625, 0.86611912, 0.87258037, 0.87645306, 0.86707169,
        0.87455708],
       [0.90019283, 0.89765207, 0.903727  , 0.90706904, 0.89870011,
        0.90545932],
       [0.79107053, 0.78816353, 0.79441974, 0.79880996, 0.78879027,
        0.79652313]])

## Cost

In [0]:
def cost(y,y_hat):
  return -np.sum(y*np.log(y_hat) + (1-y)*(np.log(1-y_hat)))

## Backward

In [0]:
def back_prop():
  y_hat = cache['a' + str(L)]
  cache['dZ' + str(L)] = (1/m) * (y_hat - Y)
  cache['dW' + str(L)] = np.dot(cache['dZ' + str(L)], cache['a' + str(L-1)].T)
  cache['db' + str(L)] = np.sum(cache['dZ' + str(L)], axis=1, keepdims=True)

  for layer in range(L-1,0,-1):
      cache['dZ' + str(layer)] = np.dot(parameters['W' + str(layer+1)].T, cache['dZ' + str(layer+1)]) * inv_sigmoid(cache['Z' + str(layer)])
      cache['dW' + str(layer)] = np.dot(cache['dZ' + str(layer)], cache['a' + str(layer-1)].T)
      cache['db' + str(layer)] = np.sum(cache['dZ' + str(layer)], axis=1, keepdims=True)

In [0]:
def update_weights():
  for layer in range(1, L+1):
      parameters['W' + str(layer)] = parameters['W' + str(layer)] - alpha * cache['dW' + str(layer)]
      parameters['b' + str(layer)] = parameters['b' + str(layer)] - alpha * cache['db' + str(layer)]


In [0]:
epoch = 1000

for i in range(epoch):
  forward_pass()
  print(cost(Y,cache['a' + str(L)]))
  back_prop()
  update_weights()

40.7870593868397
40.307651819256336
39.83394803834906
39.36597275884288
38.90374822431285
38.447294159730355
37.996627728724874
37.55176349579094
37.11271339364909
36.67948669594662
36.252089995458924
35.83052718792625
35.41479946163209
35.00490529280103
34.60084044686328
34.20259798560309
33.81016828017681
33.42353902995593
33.04269528711955
32.667619486890565
32.29829148328129
31.934688590185374
31.57678562762726
31.224554972954714
30.877966616737474
30.53698822311379
30.201585194308144
29.871720739026628
29.547355944423185
29.228449851317784
28.914959532339303
28.606840172659258
28.304045152978937
28.006526134431045
27.71423314505822
27.427114667534013
27.14511772779757
26.868187984280716
26.596269817415667
26.329306419122513
26.067239881988307
25.810011287863443
25.55756079561593
25.30982772780019
25.066750656013433
24.828267484730134
24.594315533422375
24.364831616791665
24.139752122955652
23.919013089450637
23.702550276928328
23.4902992404421
23.282195398234716
23.078174097955227