In [38]:
import numpy as np
import pandas as pd
import torch

from torchvision import transforms, datasets
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

## 과제 1
ReLu activation function과 derivative function을 구현해보세요
- Hint : np.maximum 함수 사용하면 편리합니다
- 다른 방법 사용하셔도 무방합니다


In [39]:
def relu(x):
  out = np.maximum(0, x)
  return out

In [40]:
def d_relu(x):
  out = np.where(x>0, 1, 0)
  return out

## 과제 2
Deep Learning Basic 코드 파일의 MLP implementation with Numpy library using MNIST dataset 코드 참고해서
Three layer MLP 일 때의 backward_pass 함수를 완성해주세요.   
- Hint : 코드 파일의 예시는 Two layer MLP


In [59]:
def backward_pass(x, y_true, params):
  grads = {}

  dS3 = params["A3"] - y_true
  
  grads["dW3"] =  np.dot(dS3, params["A2"].T)/x.shape[1]
  grads["db3"] =  (1/x.shape[1])*np.sum(dS3, axis=1, keepdims=True)/x.shape[1]

  dA2 = np.dot(params["W3"].T, dS3)
  dS2 = dA2 * d_sigmoid(params["S2"])

  grads["dW2"] =  np.dot(dS2, params["A1"].T)/x.shape[1]
  grads["db2"] =  (1/x.shape[1])*np.sum(dS2, axis=1, keepdims=True)/x.shape[1]

  dA1 = np.dot(params["W2"].T, dS2)
  dS1 = dA1 * d_sigmoid(params["S1"])

  grads["dW1"] = np.dot(dS1, x.T)/x.shape[1]
  grads["db1"] = np.sum(dS1, axis=1, keepdims=True)/x.shape[1]
  
  return  grads

## 과제 3
Deep Learning Basic 코드 파일의 MLP implementation with Pytorch library using MNIST dataset 코드 참고해서
Three layer MLP를 구한후, 학습을 돌려 보세요

hyperparameter는 다음과 같이 설정

- epochs : 100
- hiddensize : 128, 64 (two layer)
- learning_rate : 0.5

In [42]:
# Assignment 3 구현은 여기서 ()

In [43]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.layer1 = nn.Linear(784,128)
        self.layer2 = nn.Linear(128,64)
        self.layer3 = nn.Linear(64,10)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        x = x.view(-1, 784)
        out = self.layer1(x)
        out = self.relu(out)
        out = self.layer2(out)
        out = self.relu(out)
        out = self.layer3(out)

        return out

In [44]:
from IPython import get_ipython
get_ipython().magic('reset -sf')

import numpy as np
import sklearn.datasets

mnist = sklearn.datasets.fetch_openml('mnist_784', data_home="mnist_784")

In [45]:
# data preprocessing

num_train = 60000
num_class = 10

x_train = np.float32(mnist.data[:num_train]).T
y_train_index = np.int32(mnist.target[:num_train]).T
x_test = np.float32(mnist.data[num_train:]).T
y_test_index = np.int32(mnist.target[num_train:]).T

# Normalization

x_train /= 255
x_test /= 255
x_size = x_train.shape[0]

y_train = np.zeros((num_class, y_train_index.shape[0]))
for idx in range(y_train_index.shape[0]):
  y_train[y_train_index[idx], idx] = 1

y_test = np.zeros((num_class, y_test_index.shape[0]))
for idx in range(y_test_index.shape[0]):
  y_test[y_test_index[idx], idx] = 1    

In [46]:
#parameter initialization

hidden1_size = 128
hidden2_size = 64
num_class = 10

In [47]:
params = {"W1": np.random.randn(hidden1_size, x_size) * np.sqrt(1/ x_size),
          "b1": np.zeros((hidden1_size, 1)) * np.sqrt(1/ x_size),
          "W2": np.random.randn(hidden2_size, hidden1_size) * np.sqrt(1/ hidden1_size),
          "b2": np.zeros((hidden2_size, 1)) * np.sqrt(1/ hidden1_size),
          "W3": np.random.randn(num_class, hidden2_size) * np.sqrt(1/ hidden2_size),
          "b3": np.zeros((num_class, 1)) * np.sqrt(1/ hidden2_size)
          }

In [51]:
def compute_loss(y_true, y_pred):
  # loss calculation

  num_sample = y_true.shape[1]
  Li = -1 * np.sum(y_true * np.log(y_pred))
  
  return Li/num_sample

In [52]:
def sigmoid(x):
  return 1/(1+np.exp(-x))

def d_sigmoid(x):
  # derivative of sigmoid
  exp = np.exp(-x)
  return (exp)/((1+exp)**2)

def softmax(x):
  exp = np.exp(x)
  return exp/np.sum(exp, axis=0)

In [53]:
def foward_pass(x, params):
  
  params["S1"] = np.dot(params["W1"], x) + params["b1"]
  params["A1"] = sigmoid(params["S1"])
  params["S2"] = np.dot(params["W2"], params["A1"]) + params["b2"]
  params["A2"] = softmax(params["S2"])
  params["S3"] = np.dot(params["W3"], params["A2"]) + params["b3"]
  params["A3"] = sigmoid(params["S3"])

  return params

In [63]:
def foward_pass_test(x, params):

  params_test = {}
  
  params_test["S1"] = np.dot(params["W1"], x) + params["b1"]
  params_test["A1"] = sigmoid(params_test["S1"])
  params_test["S2"] = np.dot(params["W2"], params_test["A1"]) + params["b2"]
  params_test["A2"] = softmax(params_test["S2"])
  params_test["S3"] = np.dot(params["W3"], params_test["A2"]) + params["b3"]
  params_test["A3"] = sigmoid(params_test["S3"])

  return params_test

In [55]:
def compute_accuracy(y_true, y_pred):
  y_true_idx = np.argmax(y_true, axis = 0)
  y_pred_idx = np.argmax(y_pred, axis = 0)
  num_correct = np.sum(y_true_idx==y_pred_idx)

  accuracy = num_correct / y_true.shape[1] * 100

  return accuracy

In [60]:
def backward_pass(x, y_true, params):
  grads = {}

  dS3 = params["A3"] - y_true
  
  grads["dW3"] =  np.dot(dS3, params["A2"].T)/x.shape[1]
  grads["db3"] =  (1/x.shape[1])*np.sum(dS3, axis=1, keepdims=True)/x.shape[1]

  dA2 = np.dot(params["W3"].T, dS3)
  dS2 = dA2 * d_sigmoid(params["S2"])

  grads["dW2"] =  np.dot(dS2, params["A1"].T)/x.shape[1]
  grads["db2"] =  (1/x.shape[1])*np.sum(dS2, axis=1, keepdims=True)/x.shape[1]

  dA1 = np.dot(params["W2"].T, dS2)
  dS1 = dA1 * d_sigmoid(params["S1"])

  grads["dW1"] = np.dot(dS1, x.T)/x.shape[1]
  grads["db1"] = np.sum(dS1, axis=1, keepdims=True)/x.shape[1]
  
  return  grads

In [64]:
epochs = 100
learning_rate = 0.5

for i in range(epochs):

  if i == 0:
    params = foward_pass(x_train, params)
    
  grads = backward_pass(x_train, y_train, params)

  params["W1"] -= learning_rate * grads["dW1"]
  params["b1"] -= learning_rate * grads["db1"]
  params["W2"] -= learning_rate * grads["dW2"]
  params["b2"] -= learning_rate * grads["db2"]
  params["W3"] -= learning_rate * grads["dW3"]
  params["b3"] -= learning_rate * grads["db3"]


  params = foward_pass(x_train, params)
  train_loss = compute_loss(y_train, params["A3"])
  train_acc = compute_accuracy(y_train, params["A3"])

  params_test = foward_pass_test(x_test, params)
  test_loss = compute_loss(y_test, params_test["A3"])
  test_acc = compute_accuracy(y_test, params_test["A3"])

  print("Epoch {}: training loss = {}, training acuracy = {}%, test loss = {}, training acuracy = {}%"
  .format(i + 1, np.round(train_loss, 6), np.round(train_acc, 2), np.round(test_loss, 6), np.round(test_acc, 2)))

Epoch 1: training loss = 0.721277, training acuracy = 9.86%, test loss = 0.72135, training acuracy = 9.58%
Epoch 2: training loss = 0.727627, training acuracy = 9.86%, test loss = 0.727716, training acuracy = 9.58%
Epoch 3: training loss = 0.733397, training acuracy = 9.86%, test loss = 0.733498, training acuracy = 9.58%
Epoch 4: training loss = 0.738871, training acuracy = 9.86%, test loss = 0.738983, training acuracy = 9.58%
Epoch 5: training loss = 0.744188, training acuracy = 9.86%, test loss = 0.74431, training acuracy = 9.58%
Epoch 6: training loss = 0.749417, training acuracy = 9.86%, test loss = 0.749549, training acuracy = 9.58%
Epoch 7: training loss = 0.754594, training acuracy = 9.86%, test loss = 0.754735, training acuracy = 9.58%
Epoch 8: training loss = 0.759737, training acuracy = 9.86%, test loss = 0.759887, training acuracy = 9.58%
Epoch 9: training loss = 0.764858, training acuracy = 9.86%, test loss = 0.765017, training acuracy = 9.58%
Epoch 10: training loss = 0.76

## 과제 4
과제 3 부분의 성능을 지금까지 배운 지식을 바탕으로 향상시켜보세요

- Hint : Activation function, hyperparameter setting

In [None]:
# Assignment 4 구현은 여기서 ()


In [68]:
from IPython import get_ipython
get_ipython().magic('reset -sf')

import numpy as np
import sklearn.datasets

mnist = sklearn.datasets.fetch_openml('mnist_784', data_home="mnist_784")

# data preprocessing

num_train = 60000
num_class = 10

x_train = np.float32(mnist.data[:num_train]).T
y_train_index = np.int32(mnist.target[:num_train]).T
x_test = np.float32(mnist.data[num_train:]).T
y_test_index = np.int32(mnist.target[num_train:]).T

# Normalization

x_train /= 255
x_test /= 255
x_size = x_train.shape[0]

y_train = np.zeros((num_class, y_train_index.shape[0]))
for idx in range(y_train_index.shape[0]):
  y_train[y_train_index[idx], idx] = 1

y_test = np.zeros((num_class, y_test_index.shape[0]))
for idx in range(y_test_index.shape[0]):
  y_test[y_test_index[idx], idx] = 1    

In [76]:
def relu(x):
  out = np.maximum(0, x)
  return out

In [74]:
def foward_pass(x, params):
  
  params["S1"] = np.dot(params["W1"], x) + params["b1"]
  params["A1"] = relu(params["S1"])
  params["S2"] = np.dot(params["W2"], params["A1"]) + params["b2"]
  params["A2"] = relu(params["S2"])
  params["S3"] = np.dot(params["W3"], params["A2"]) + params["b3"]
  params["A3"] = relu(params["S3"])

  return params

In [80]:
def foward_pass_test(x, params):

  params_test = {}
  
  params_test["S1"] = np.dot(params["W1"], x) + params["b1"]
  params_test["A1"] = relu(params_test["S1"])
  params_test["S2"] = np.dot(params["W2"], params_test["A1"]) + params["b2"]
  params_test["A2"] = relu(params_test["S2"])
  params_test["S3"] = np.dot(params["W3"], params_test["A2"]) + params["b3"]
  params_test["A3"] = relu(params_test["S3"])

  return params_test

In [69]:
#parameter initialization

hidden1_size = 128
hidden2_size = 64
num_class = 10

params = {"W1": np.random.randn(hidden1_size, x_size) * np.sqrt(1/ x_size),
          "b1": np.zeros((hidden1_size, 1)) * np.sqrt(1/ x_size),
          "W2": np.random.randn(hidden2_size, hidden1_size) * np.sqrt(1/ hidden1_size),
          "b2": np.zeros((hidden2_size, 1)) * np.sqrt(1/ hidden1_size),
          "W3": np.random.randn(num_class, hidden2_size) * np.sqrt(1/ hidden2_size),
          "b3": np.zeros((num_class, 1)) * np.sqrt(1/ hidden2_size)
          }

In [70]:
def compute_loss(y_true, y_pred):
  # loss calculation

  num_sample = y_true.shape[1]
  Li = -1 * np.sum(y_true * np.log(y_pred))
  
  return Li/num_sample

In [71]:
def compute_accuracy(y_true, y_pred):
  y_true_idx = np.argmax(y_true, axis = 0)
  y_pred_idx = np.argmax(y_pred, axis = 0)
  num_correct = np.sum(y_true_idx==y_pred_idx)

  accuracy = num_correct / y_true.shape[1] * 100

  return accuracy

In [78]:
def backward_pass(x, y_true, params):
  grads = {}

  dS3 = params["A3"] - y_true
  
  grads["dW3"] =  np.dot(dS3, params["A2"].T)/x.shape[1]
  grads["db3"] =  (1/x.shape[1])*np.sum(dS3, axis=1, keepdims=True)/x.shape[1]

  dA2 = np.dot(params["W3"].T, dS3)
  dS2 = dA2 * relu(params["S2"])

  grads["dW2"] =  np.dot(dS2, params["A1"].T)/x.shape[1]
  grads["db2"] =  (1/x.shape[1])*np.sum(dS2, axis=1, keepdims=True)/x.shape[1]

  dA1 = np.dot(params["W2"].T, dS2)
  dS1 = dA1 * relu(params["S1"])

  grads["dW1"] = np.dot(dS1, x.T)/x.shape[1]
  grads["db1"] = np.sum(dS1, axis=1, keepdims=True)/x.shape[1]
  
  return  grads

In [None]:
epochs = 100
learning_rate = 0.3

for i in range(epochs):

  if i == 0:
    params = foward_pass(x_train, params)
    
  grads = backward_pass(x_train, y_train, params)

  params["W1"] -= learning_rate * grads["dW1"]
  params["b1"] -= learning_rate * grads["db1"]
  params["W2"] -= learning_rate * grads["dW2"]
  params["b2"] -= learning_rate * grads["db2"]
  params["W3"] -= learning_rate * grads["dW3"]
  params["b3"] -= learning_rate * grads["db3"]


  params = foward_pass(x_train, params)
  train_loss = compute_loss(y_train, params["A3"])
  train_acc = compute_accuracy(y_train, params["A3"])

  params_test = foward_pass_test(x_test, params)
  test_loss = compute_loss(y_test, params_test["A3"])
  test_acc = compute_accuracy(y_test, params_test["A3"])

  print("Epoch {}: training loss = {}, training acuracy = {}%, test loss = {}, training acuracy = {}%"
  .format(i + 1, np.round(train_loss, 6), np.round(train_acc, 2), np.round(test_loss, 6), np.round(test_acc, 2)))

  """
  """


Epoch 1: training loss = nan, training acuracy = 10.4%, test loss = nan, training acuracy = 10.23%
Epoch 2: training loss = nan, training acuracy = 12.62%, test loss = nan, training acuracy = 12.83%
Epoch 3: training loss = nan, training acuracy = 15.07%, test loss = nan, training acuracy = 15.36%
Epoch 4: training loss = nan, training acuracy = 17.68%, test loss = nan, training acuracy = 18.02%
Epoch 5: training loss = nan, training acuracy = 20.27%, test loss = nan, training acuracy = 20.84%
Epoch 6: training loss = nan, training acuracy = 23.09%, test loss = nan, training acuracy = 23.72%
Epoch 7: training loss = nan, training acuracy = 25.61%, test loss = nan, training acuracy = 26.5%
Epoch 8: training loss = nan, training acuracy = 28.14%, test loss = nan, training acuracy = 28.97%
Epoch 9: training loss = nan, training acuracy = 30.38%, test loss = nan, training acuracy = 31.62%
Epoch 10: training loss = nan, training acuracy = 32.42%, test loss = nan, training acuracy = 33.77%
E

**무엇을 보완하였고, 왜 보완되었는지에 대한 자유 서술 (아래에)**

학습률을 낮추고 (0.2)로 활성화함수를 relu로 바꾸었다.
그랬더니 정확도가 엄청나게 올랐다.
relu의 성능을 느낄 수 있었다.
그리고 loss를 계산할 때, 값이 전부 nan이 나왔는데, relu함수가 0을 반환하기도 해서 그런 결과가 나온 거 같다