In [0]:
import numpy as np

In [0]:
def sigmoid(input):
  return 1 / (1+np.exp(-input))


def relu(input):
  return input * (input > 0)


def initialize_parameters(n_x, n_y):
  # 편의를 위해서 seed 설정
  np.random.seed(20181001) 
  
  # W.shape = (n_x, n_y)
  # b.shape = (1, n_y)
  
  W = np.random.randn(n_x, n_y)
  b = np.zeros([1, n_y])
  
  # 편의를 위해 dictionary 사용
  parameters = {"W": W,
                "b": b}
  
  return parameters


def forward_propagation(X, parameters, activation="sigmoid"):
  # Z.shape = Y.shape = (m, n_y) = (m, n_x) * (n_x, n_y)
  Z = np.dot(X, parameters["W"]) + parameters["b"]
  if activation == "sigmoid":
    A = sigmoid(Z)
  elif activation == "relu":
    A = relu(Z)

  return A


def compute_loss(Y_hat, Y):
  # MSE
  return -np.sum(Y*np.log(Y_hat) + (1-Y) * np.log(1-Y_hat)) / len(Y)

  
def backward_propagation(X, Y, A):
  # dL/dA = dL/dA * dA/dZ * dZ/dW
  # dL/db = dL/dA * dA/dZ * dZ/db
  # by Chain rule

  dL_dZ = A - Y
  
  
  dZ_dW = X
  dZ_db = 1
  
  dL_dW = np.dot(dL_dZ.T, dZ_dW)
  dL_db = np.sum(dL_dZ * dZ_db)
  
  grads = {"dW": dL_dW,
           "db": dL_db}
  return grads


def update_parameters(parameters, grads, learning_rate):
  parameters["W"] -= learning_rate * grads["dW"].T
  parameters["b"] -= learning_rate * grads["db"]
  
  return parameters

In [3]:
# Data. X.shape = (4, 3), Y.shape = (4, 1)
X = np.array([[1, 2], [3, 4],[2, 1],[4, 3]])
Y = np.array([[0], [1], [0], [1] ])
print(X)
print(Y)

[[1 2]
 [3 4]
 [2 1]
 [4 3]]
[[0]
 [1]
 [0]
 [1]]


In [4]:
# Hyperparamerters
num_epochs = 2000
learning_rate = 1e-2

# 1. Initialize Parameters
parameters = initialize_parameters(X.shape[1],Y.shape[1])

# 2. Loop N iteration (N: Num of epochs)
for epoch in range(num_epochs):
  # Forward Propagation
  Y_hat = forward_propagation(X, parameters, "sigmoid")
  
  # Compute loss
  loss = compute_loss(Y_hat, Y)
  
  # Backward Propagation
  grads = backward_propagation(X, Y, Y_hat)
  
  # Update Parameters
  parameters = update_parameters(parameters, grads, learning_rate)

  # Print Loss
  if (epoch+1) % 100 == 0 or epoch+1 == 1 :
    print(epoch+1, loss)


1 1.5869227159398303
100 0.581662012314877
200 0.47549562861905287
300 0.39976984374493846
400 0.34331937110261607
500 0.2996316141159108
600 0.264868388919475
700 0.23662596041180517
800 0.21330256619806728
900 0.1937781760092836
1000 0.17724195465943854
1100 0.1630918218077932
1200 0.15087187497393395
1300 0.14023133582770833
1400 0.13089658839619606
1500 0.12265160319807941
1600 0.11532394033452233
1700 0.10877456954131144
1800 0.10289035847743302
1900 0.09757845889039915
2000 0.09276206266263975


In [5]:
print(Y)
print(Y_hat)

[[0]
 [1]
 [0]
 [1]]
[[0.12028496]
 [0.94445093]
 [0.12098505]
 [0.94479615]]
