## 과제 1
ReLu activation function과 derivative function을 구현해보세요
- Hint : np.maximum 함수 사용하면 편리합니다
- 다른 방법 사용하셔도 무방합니다


In [54]:
import numpy as np

In [17]:
def relu(x):
    for i in range(0, len(x)):
        x[i] = max(0, x[i])
    
    return x

In [18]:
relu([-2, -3, 0.5, 10, -4])

[0, 0, 0.5, 10, 0]

In [19]:
def d_relu(x):
    for i in range(0, len(x)):
        if x[i] > 0:
            x[i] = 1
        else:
            x[i] = 0
    
    return x

In [20]:
d_relu([-2, -3, 0.5, 10, -4])

[0, 0, 1, 1, 0]

## 과제 2
Deep Learning Basic 코드 파일의 MLP implementation with Numpy library using MNIST dataset 코드 참고해서
Three layer MLP 일 때의 backward_pass 함수를 완성해주세요.   
- Hint : 코드 파일의 예시는 Two layer MLP


In [8]:
def backward_pass(x, y_true, params):

  dS3 = params["A3"] - y_true

  grads = {}

  grads["dW3"] =  np.dot(dS3, params["A2"].T)/x.shape[1]
  grads["db3"] =  (1/x.shape[1])*np.sum(dS3, axis=1, keepdims=True)/x.shape[1]

  dA2 = np.dot(params["W3"].T, dS3)
  dS2 = dA2 * d_sigmoid(params["S2"])

  grads["dW2"] =  np.dot(dS2, params["A1"].T)/x.shape[1]
  grads["db2"] =  (1/x.shape[1])*np.sum(dS2, axis=1, keepdims=True)/x.shape[1]

  dA1 = np.dot(params["W2"].T, dS2)
  dS1 = dA1 * d_sigmoid(params["S1"])

  grads["dW1"] = np.dot(dS1, x.T)/x.shape[1]
  grads["db1"] = np.sum(dS1, axis=1, keepdims=True)/x.shape[1]

  return grads

## 과제 3
Deep Learning Basic 코드 파일의 MLP implementation with Pytorch library using MNIST dataset 코드 참고해서
Three layer MLP를 구한후, 학습을 돌려 보세요

hyperparameter는 다음과 같이 설정

- epochs : 100
- hiddensize : 128, 64 (two layer)
- learning_rate : 0.5

In [1]:
# Assignment 3 구현은 여기서

from IPython import get_ipython
get_ipython().magic('reset -sf')

import numpy as np
import sklearn.datasets

mnist = sklearn.datasets.fetch_openml('mnist_784', data_home="mnist_784")

In [2]:
# data preprocessing

num_train = 60000
num_class = 10

x_train = np.float32(mnist.data[:num_train]).T
y_train_index = np.int32(mnist.target[:num_train]).T
x_test = np.float32(mnist.data[num_train:]).T
y_test_index = np.int32(mnist.target[num_train:]).T

# Normalization

x_train /= 255
x_test /= 255
x_size = x_train.shape[0]

y_train = np.zeros((num_class, y_train_index.shape[0]))
for idx in range(y_train_index.shape[0]):
  y_train[y_train_index[idx], idx] = 1

y_test = np.zeros((num_class, y_test_index.shape[0]))
for idx in range(y_test_index.shape[0]):
  y_test[y_test_index[idx], idx] = 1    

In [10]:
#parameter initialization

hidden_size1 = 128
hidden_size2 = 64

# three-layer neural network

params = {"W1": np.random.randn(hidden_size1, x_size) * np.sqrt(1/ x_size),
          "b1": np.zeros((hidden_size1, 1)) * np.sqrt(1/x_size),
          "W2": np.random.randn(hidden_size2, hidden_size1) * np.sqrt(1/hidden_size1),
          "b2": np.zeros((hidden_size2, 1)) * np.sqrt(1/hidden_size1),
          "W3": np.random.randn(num_class, hidden_size2) * np.sqrt(1/ hidden_size2),
          "b3": np.zeros((num_class, 1)) * np.sqrt(1/ hidden_size2)
          }
# Xavier initialization: https://reniew.github.io/13/

In [11]:
def sigmoid(x):
  return 1/(1+np.exp(-x))

def d_sigmoid(x):
  # derivative of sigmoid
  exp = np.exp(-x)
  return (exp)/((1+exp)**2)

def softmax(x):
  exp = np.exp(x)
  return exp/np.sum(exp, axis=0)

def compute_loss(y_true, y_pred):
  # loss calculation

  num_sample = y_true.shape[1]
  Li = -1 * np.sum(y_true * np.log(y_pred))
  
  return Li/num_sample

def foward_pass(x, params):
  
  params["S1"] = np.dot(params["W1"], x) + params["b1"]
  params["A1"] = sigmoid(params["S1"])
  params["S2"] = np.dot(params["W2"], params["A1"]) + params["b2"]
  params["A2"] = softmax(params["S2"])
  params["S3"] = np.dot(params["W3"], params["A2"]) + params["b3"]
  params["A3"] = softmax(params["S3"])

  return params

def foward_pass_test(x, params):

  params_test = {}
  
  params_test["S1"] = np.dot(params["W1"], x) + params["b1"]
  params_test["A1"] = sigmoid(params_test["S1"])
  params_test["S2"] = np.dot(params["W2"], params_test["A1"]) + params["b2"]
  params_test["A2"] = softmax(params_test["S2"])
  params_test["S3"] = np.dot(params["W3"], params_test["A2"]) + params["b3"]
  params_test["A3"] = softmax(params_test["S3"])

  return params_test

def compute_accuracy(y_true, y_pred):
  y_true_idx = np.argmax(y_true, axis = 0)
  y_pred_idx = np.argmax(y_pred, axis = 0)
  num_correct = np.sum(y_true_idx==y_pred_idx)

  accuracy = num_correct / y_true.shape[1] * 100

  return accuracy

In [12]:
epochs = 100
learning_rate = 0.5

for i in range(epochs):

  if i == 0:
    params = foward_pass(x_train, params)
    
  grads = backward_pass(x_train, y_train, params)

  params["W1"] -= learning_rate * grads["dW1"]
  params["b1"] -= learning_rate * grads["db1"]
  params["W2"] -= learning_rate * grads["dW2"]
  params["b2"] -= learning_rate * grads["db2"]
  params["W3"] -= learning_rate * grads["dW3"]
  params["b3"] -= learning_rate * grads["db3"]

  params = foward_pass(x_train, params)
  train_loss = compute_loss(y_train, params["A3"])
  train_acc = compute_accuracy(y_train, params["A3"])

  params_test = foward_pass_test(x_test, params)
  test_loss = compute_loss(y_test, params_test["A3"])
  test_acc = compute_accuracy(y_test, params_test["A3"])

  print("Epoch {}: training loss = {}, training acuracy = {}%, test loss = {}, training acuracy = {}%"
  .format(i + 1, np.round(train_loss, 6), np.round(train_acc, 2), np.round(test_loss, 6), np.round(test_acc, 2)))

Epoch 1: training loss = 2.302552, training acuracy = 9.75%, test loss = 2.302432, training acuracy = 9.74%
Epoch 2: training loss = 2.30239, training acuracy = 9.75%, test loss = 2.302267, training acuracy = 9.74%
Epoch 3: training loss = 2.302227, training acuracy = 9.75%, test loss = 2.302103, training acuracy = 9.74%
Epoch 4: training loss = 2.302065, training acuracy = 9.75%, test loss = 2.301938, training acuracy = 9.74%
Epoch 5: training loss = 2.301902, training acuracy = 9.75%, test loss = 2.301774, training acuracy = 9.74%
Epoch 6: training loss = 2.301739, training acuracy = 9.75%, test loss = 2.301609, training acuracy = 9.74%
Epoch 7: training loss = 2.301575, training acuracy = 9.75%, test loss = 2.301443, training acuracy = 9.74%
Epoch 8: training loss = 2.301412, training acuracy = 9.75%, test loss = 2.301278, training acuracy = 9.74%
Epoch 9: training loss = 2.301248, training acuracy = 9.75%, test loss = 2.301112, training acuracy = 9.74%
Epoch 10: training loss = 2.3

In [14]:
# Final test accuracy: 53.17%

## 과제 4
과제 3 부분의 성능을 지금까지 배운 지식을 바탕으로 향상시켜보세요

- Hint : Activation function, hyperparameter setting

In [105]:
# Assignment 4 구현은 여기서

# data preprocessing

num_train = 60000
num_class = 10

x_train = np.float32(mnist.data[:num_train]).T
y_train_index = np.int32(mnist.target[:num_train]).T
x_test = np.float32(mnist.data[num_train:]).T
y_test_index = np.int32(mnist.target[num_train:]).T

# Normalization

x_train /= 255
x_test /= 255
x_size = x_train.shape[0]

y_train = np.zeros((num_class, y_train_index.shape[0]))
for idx in range(y_train_index.shape[0]):
  y_train[y_train_index[idx], idx] = 1

y_test = np.zeros((num_class, y_test_index.shape[0]))
for idx in range(y_test_index.shape[0]):
  y_test[y_test_index[idx], idx] = 1    



#parameter initialization

hidden_size1 = 64
hidden_size2 = 100

# three-layer neural network

params = {"W1": np.random.randn(hidden_size1, x_size) * np.sqrt(1/ x_size),
          "b1": np.zeros((hidden_size1, 1)) * np.sqrt(1/x_size),
          "W2": np.random.randn(hidden_size2, hidden_size1) * np.sqrt(1/hidden_size1),
          "b2": np.zeros((hidden_size2, 1)) * np.sqrt(1/hidden_size1),
          "W3": np.random.randn(num_class, hidden_size2) * np.sqrt(1/ hidden_size2),
          "b3": np.zeros((num_class, 1)) * np.sqrt(1/ hidden_size2)
          }
# Xavier initialization: https://reniew.github.io/13/

In [106]:
def relu(x):
    return np.maximum(1e-4, x)

def d_relu(x):
    return np.greater(x, 1e-4).astype(int)

In [107]:
def foward_pass(x, params):
  
  params["S1"] = np.dot(params["W1"], x) + params["b1"]
  params["A1"] = sigmoid(params["S1"])
  params["S2"] = np.dot(params["W2"], params["A1"]) + params["b2"]
  params["A2"] = softmax(params["S2"])
  params["S3"] = np.dot(params["W3"], params["A2"]) + params["b3"]
  params["A3"] = softmax(params["S3"])

  return params

def backward_pass(x, y_true, params):

  dS3 = params["A3"] - y_true

  grads = {}

  grads["dW3"] =  np.dot(dS3, params["A2"].T)/x.shape[1]
  grads["db3"] =  (1/x.shape[1])*np.sum(dS3, axis=1, keepdims=True)/x.shape[1]

  dA2 = np.dot(params["W3"].T, dS3)
  dS2 = dA2 * d_sigmoid(params["S2"])

  grads["dW2"] =  np.dot(dS2, params["A1"].T)/x.shape[1]
  grads["db2"] =  (1/x.shape[1])*np.sum(dS2, axis=1, keepdims=True)/x.shape[1]

  dA1 = np.dot(params["W2"].T, dS2)
  dS1 = dA1 * d_sigmoid(params["S1"])

  grads["dW1"] = np.dot(dS1, x.T)/x.shape[1]
  grads["db1"] = np.sum(dS1, axis=1, keepdims=True)/x.shape[1]

  return grads

In [108]:
epochs = 100
learning_rate = 0.65

for i in range(epochs):

  if i == 0:
    params = foward_pass(x_train, params)
    
  grads = backward_pass(x_train, y_train, params)

  params["W1"] -= learning_rate * grads["dW1"]
  params["b1"] -= learning_rate * grads["db1"]
  params["W2"] -= learning_rate * grads["dW2"]
  params["b2"] -= learning_rate * grads["db2"]
  params["W3"] -= learning_rate * grads["dW3"]
  params["b3"] -= learning_rate * grads["db3"]

  params = foward_pass(x_train, params)
  train_loss = compute_loss(y_train, params["A3"])
  train_acc = compute_accuracy(y_train, params["A3"])

  params_test = foward_pass_test(x_test, params)
  test_loss = compute_loss(y_test, params_test["A3"])
  test_acc = compute_accuracy(y_test, params_test["A3"])

  print("Epoch {}: training loss = {}, training acuracy = {}%, test loss = {}, training acuracy = {}%"
  .format(i + 1, np.round(train_loss, 6), np.round(train_acc, 2), np.round(test_loss, 6), np.round(test_acc, 2)))

Epoch 1: training loss = 2.302711, training acuracy = 10.11%, test loss = 2.302795, training acuracy = 9.75%
Epoch 2: training loss = 2.302607, training acuracy = 10.27%, test loss = 2.302688, training acuracy = 9.88%
Epoch 3: training loss = 2.302504, training acuracy = 10.46%, test loss = 2.302582, training acuracy = 10.02%
Epoch 4: training loss = 2.3024, training acuracy = 10.76%, test loss = 2.302475, training acuracy = 10.29%
Epoch 5: training loss = 2.302296, training acuracy = 11.14%, test loss = 2.302368, training acuracy = 10.65%
Epoch 6: training loss = 2.302191, training acuracy = 11.73%, test loss = 2.30226, training acuracy = 11.17%
Epoch 7: training loss = 2.302086, training acuracy = 12.42%, test loss = 2.302152, training acuracy = 11.74%
Epoch 8: training loss = 2.301981, training acuracy = 13.23%, test loss = 2.302044, training acuracy = 12.5%
Epoch 9: training loss = 2.301875, training acuracy = 14.12%, test loss = 2.301934, training acuracy = 13.51%
Epoch 10: traini

**무엇을 보완하였고, 왜 보완되었는지에 대한 자유 서술 (아래에)**

In [109]:
# Final training accuracy: 59.98%

# 1) Forward pass, backward pass에는 sigmoid, d_sigmoid만 사용. 즉, 변화 없음.
# (왜인지는 모르겠지만 ReLU를 사용하면 overfitting이 너무 심해서 중간에 정확도가 도로 하락함)

# 2) Hidden layer node size 변경
# 종전: 128 & 64 -> 변경: 64 & 100

# 정확도가 6%p 상승함.