## 과제 1
ReLu activation function과 derivative function을 구현해보세요
- Hint : np.maximum 함수 사용하면 편리합니다
- 다른 방법 사용하셔도 무방합니다


In [None]:
import numpy as np

def relu(x):

  return np.maximum(0,x)

print(relu(3))
print(relu(-3))

3
0


In [None]:
def d_relu(x):
  if x>0:
    return 1
  else:
    return 0

print(d_relu(3))
print(d_relu(-3))

1
0


## 과제 2
Deep Learning Basic 코드 파일의 MLP implementation with Numpy library using MNIST dataset 코드 참고해서
Three layer MLP 일 때의 backward_pass 함수를 완성해주세요.   
- Hint : 코드 파일의 예시는 Two layer MLP


In [None]:
from IPython import get_ipython
get_ipython().magic('reset -sf')

import numpy as np
import sklearn.datasets

mnist = sklearn.datasets.fetch_openml('mnist_784', data_home="mnist_784")

In [None]:
# data preprocessing

num_train = 60000
num_class = 10

x_train = np.float32(mnist.data[:num_train]).T
y_train_index = np.int32(mnist.target[:num_train]).T
x_test = np.float32(mnist.data[num_train:]).T
y_test_index = np.int32(mnist.target[num_train:]).T

# Normalization

x_train /= 255
x_test /= 255
x_size = x_train.shape[0]

y_train = np.zeros((num_class, y_train_index.shape[0]))
for idx in range(y_train_index.shape[0]):
  y_train[y_train_index[idx], idx] = 1

y_test = np.zeros((num_class, y_test_index.shape[0]))
for idx in range(y_test_index.shape[0]):
  y_test[y_test_index[idx], idx] = 1    

In [None]:
x_train.shape

(784, 60000)

In [None]:
#parameter initialization

hidden_size1 = 128 # hidden unit 1 size
hidden_size2 = 64  # hidden unit 2 size

# three-layer neural network

params = {"W1": np.random.randn(hidden_size1, x_size) * np.sqrt(1/ x_size),
          "b1": np.zeros((hidden_size1, 1)) * np.sqrt(1/ x_size),
          "W2": np.random.randn(hidden_size2, hidden_size1) * np.sqrt(1/ hidden_size1),
          "b2": np.zeros((hidden_size2, 1)) * np.sqrt(1/ hidden_size1),
          "W3": np.random.randn(num_class, hidden_size2) * np.sqrt(1/ hidden_size2),
          "b3": np.zeros((num_class, 1)) * np.sqrt(1/ hidden_size2)
          }
# Xavier initialization: https://reniew.github.io/13/

In [None]:
def sigmoid(x):
  return 1/(1+np.exp(-x))

def d_sigmoid(x):
  # derivative of sigmoid
  exp = np.exp(-x)
  return (exp)/((1+exp)**2)

def softmax(x):
  exp = np.exp(x)
  return exp/np.sum(exp, axis=0)

In [None]:
def compute_loss(y_true, y_pred):
  # loss calculation

  num_sample = y_true.shape[1]
  Li = -1 * np.sum(y_true * np.log(y_pred))
  
  return Li/num_sample

In [None]:
def foward_pass(x, params):
  
  params["S1"] = np.dot(params["W1"], x) + params["b1"]
  params["A1"] = sigmoid(params["S1"])
  params["S2"] = np.dot(params["W2"], params["A1"]) + params["b2"]
  params["A2"] = sigmoid(params["S2"])
  params["S3"] = np.dot(params["W3"], params["A2"]) + params["b3"]
  params["A3"] = softmax(params["S3"])

  return params

In [None]:
def foward_pass_test(x, params):

  params_test = {}
  
  params_test["S1"] = np.dot(params["W1"], x) + params["b1"]
  params_test["A1"] = sigmoid(params_test["S1"])
  params_test["S2"] = np.dot(params["W2"], params_test["A1"]) + params["b2"]
  params_test["A2"] = sigmoid(params_test["S2"])
  params_test["S3"] = np.dot(params["W3"], params_test["A2"]) + params["b3"]
  params_test["A3"] = softmax(params_test["S3"])

  return params_test

In [None]:
def compute_accuracy(y_true, y_pred):
  y_true_idx = np.argmax(y_true, axis = 0)
  y_pred_idx = np.argmax(y_pred, axis = 0)
  num_correct = np.sum(y_true_idx==y_pred_idx)

  accuracy = num_correct / y_true.shape[1] * 100

  return accuracy

In [None]:
def backward_pass(x, y_true, params):

  dS3 = params["A3"] - y_true
  # Please check http://machinelearningmechanic.com/deep_learning/2019/09/04/cross-entropy-loss-derivative.html
  # dS3 is softmax + CE loss derivative

  # https://towardsdatascience.com/lets-code-a-neural-network-in-plain-numpy-ae7e74410795
  # https://junstar92.tistory.com/76 

  grads = {}

  grads["dW3"] =  np.dot(dS3, params["A2"].T)/x.shape[1]
  grads["db3"] =  np.sum(dS3, axis=1, keepdims=True)/x.shape[1] #*(1/x.shape[1])

  dA2 = np.dot(params["W3"].T, dS3)
  dS2 = dA2 * d_sigmoid(params["S2"])

  grads["dW2"] = np.dot(dS2, params["A1"].T)/x.shape[1]
  grads["db2"] = np.sum(dS2, axis=1, keepdims=True)/x.shape[1]

  dA1 = np.dot(params["W2"].T, dS2)
  dS1 = dA1 * d_sigmoid(params["S1"])

  grads["dW1"] = np.dot(dS1, x.T)/x.shape[1]
  grads["db1"] = np.sum(dS1, axis=1, keepdims=True)/x.shape[1]

  return grads

In [None]:
epochs = 100
learning_rate = 0.5

for i in range(epochs):

  if i == 0:
    params = foward_pass(x_train, params)
    
  grads = backward_pass(x_train, y_train, params)

  params["W1"] -= learning_rate * grads["dW1"]
  params["b1"] -= learning_rate * grads["db1"]
  params["W2"] -= learning_rate * grads["dW2"]
  params["b2"] -= learning_rate * grads["db2"]
  params["W3"] -= learning_rate * grads["dW3"]
  params["b3"] -= learning_rate * grads["db3"]

  params = foward_pass(x_train, params)
  train_loss = compute_loss(y_train, params["A3"])
  train_acc = compute_accuracy(y_train, params["A3"])

  params_test = foward_pass_test(x_test, params)
  test_loss = compute_loss(y_test, params_test["A3"])
  test_acc = compute_accuracy(y_test, params_test["A3"])

  print("Epoch {}: training loss = {}, training acuracy = {}%, test loss = {}, testing acuracy = {}%"
  .format(i + 1, np.round(train_loss, 6), np.round(train_acc, 2), np.round(test_loss, 6), np.round(test_acc, 2)))

Epoch 1: training loss = 2.334616, training acuracy = 10.44%, test loss = 2.335318, testing acuracy = 10.3%
Epoch 2: training loss = 2.300241, training acuracy = 10.5%, test loss = 2.300212, testing acuracy = 10.62%
Epoch 3: training loss = 2.294677, training acuracy = 11.36%, test loss = 2.294348, testing acuracy = 11.42%
Epoch 4: training loss = 2.292322, training acuracy = 11.24%, test loss = 2.291912, testing acuracy = 11.35%
Epoch 5: training loss = 2.289997, training acuracy = 11.24%, test loss = 2.289543, testing acuracy = 11.35%
Epoch 6: training loss = 2.287659, training acuracy = 11.24%, test loss = 2.287162, testing acuracy = 11.36%
Epoch 7: training loss = 2.285303, training acuracy = 11.25%, test loss = 2.284763, testing acuracy = 11.36%
Epoch 8: training loss = 2.282926, training acuracy = 11.28%, test loss = 2.282342, testing acuracy = 11.38%
Epoch 9: training loss = 2.280522, training acuracy = 11.34%, test loss = 2.279895, testing acuracy = 11.4%
Epoch 10: training los

## 과제 3
Deep Learning Basic 코드 파일의 MLP implementation with Pytorch library using MNIST dataset 코드 참고해서
Three layer MLP를 구한후, 학습을 돌려 보세요

hyperparameter는 다음과 같이 설정

- epochs : 100
- hiddensize : 128, 64 (two layer)
- learning_rate : 0.5

In [None]:
from torchvision import transforms, datasets
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

# 이미지를 텐서로 변경
transform = transforms.Compose([
    transforms.ToTensor()
])

trainset = datasets.MNIST(
    root      = './.data/', 
    train     = True,
    download  = True,
    transform = transform
)
testset = datasets.MNIST(
    root      = './.data/', 
    train     = False,
    download  = True,
    transform = transform
)

BATCH_SIZE = 512
# train set과 test set 각각에 대하여 DataLoader를 생성합니다.
# shuffle=True 매개변수를 넣어 데이터를 섞어주세요.
train_loader = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)
test_loader =  DataLoader(testset, batch_size=BATCH_SIZE, shuffle=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./.data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./.data/MNIST/raw/train-images-idx3-ubyte.gz to ./.data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./.data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./.data/MNIST/raw/train-labels-idx1-ubyte.gz to ./.data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./.data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./.data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./.data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./.data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./.data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./.data/MNIST/raw



In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__() #forward
        self.layer1 = nn.Linear(784,128) #이미지의 크기가 28*28. 색 흑백이기 때문임.
        self.layer2 = nn.Linear(128,64)  
        self.layer3 = nn.Linear(64,10)  #classification을 할 때 0~9까지 볼 것이기 때문에 10개임.
        self.relu = nn.ReLU()
        
    def forward(self, x): #이미 있는 것들을 곱해주는 .. 
        x = x.view(-1, 784)
        out = self.layer1(x)
        out = self.relu(out)
        out = self.layer2(out)
        out = self.relu(out)
        out = self.layer3(out)

        return out


model = Net()
model

criterion = nn.CrossEntropyLoss() #손실함수
optimizer = optim.SGD(model.parameters(), lr=0.5)

def train(model, train_loader, optimizer):
    model.train() #train은 backward를 실행하는 반면, test는 forward를 진행함.

    # 배치 당 loss 값을 담을 리스트 생성
    batch_losses = []

    for data, target in train_loader:
        # 옵티마이저의 기울기 초기화
        optimizer.zero_grad()

        # y pred 값 산출
        output = model(data)

        # loss 계산
        # 정답 데이터(target)와의 cross entropy loss 계산
        # 이 loss를 배치 당 loss로 보관
        loss = criterion(output, target)
        batch_losses.append(loss)

        # 기울기 계산 - 가중치 업데이트.
        loss.backward()

        # 가중치 업데이트!
        optimizer.step()
        
    # 배치당 평균 loss 계산
    avg_loss = sum(batch_losses) / len(batch_losses)
    
    return avg_loss

def evaluate(model, test_loader):
    # 모델을 평가 모드로 전환
    model.eval()

    batch_losses = []
    correct = 0 

    with torch.no_grad(): #가중치를 업데이트하는 것이 아님.
        for data, target in test_loader:
            # 예측값 생성
            output = model(data)

            # loss 계산 (이전과 동일)
            loss = criterion(output, target)
            batch_losses.append(loss)

           # Accuracy 계산
           # y pred와 y가 일치하면 correct에 1을 더해주기
            pred = output.max(1, keepdim=True)[1]

            # eq() 함수는 값이 일치하면 1을, 아니면 0을 출력.
            correct += pred.eq(target.view_as(pred)).sum().item()

    # 배치 당 평균 loss 계산 
    avg_loss =  sum(batch_losses) / len(batch_losses)

    #정확도 계산
    accuracy = 100. * correct / len(test_loader.dataset)

    return avg_loss, accuracy

In [None]:
EPOCHS = 100

for epoch in range(1, EPOCHS + 1):
    train_loss = train(model, train_loader, optimizer)
    test_loss, test_accuracy = evaluate(model, test_loader)
    
    print('[{}] Train Loss: {:.4f}\tTest Loss: {:.4f}\tAccuracy: {:.2f}%'.format(
          epoch, train_loss, test_loss, test_accuracy))

[1] Train Loss: 0.8035	Test Loss: 0.3092	Accuracy: 90.47%
[2] Train Loss: 0.2283	Test Loss: 0.2095	Accuracy: 93.02%
[3] Train Loss: 0.1662	Test Loss: 0.2527	Accuracy: 91.73%
[4] Train Loss: 0.1381	Test Loss: 0.1459	Accuracy: 95.65%
[5] Train Loss: 0.1046	Test Loss: 0.1267	Accuracy: 96.00%
[6] Train Loss: 0.0888	Test Loss: 0.0979	Accuracy: 96.91%
[7] Train Loss: 0.0769	Test Loss: 0.2920	Accuracy: 91.08%
[8] Train Loss: 0.3030	Test Loss: 0.1626	Accuracy: 94.99%
[9] Train Loss: 0.0942	Test Loss: 0.1230	Accuracy: 96.18%
[10] Train Loss: 0.0764	Test Loss: 0.1231	Accuracy: 96.30%
[11] Train Loss: 0.0667	Test Loss: 0.0853	Accuracy: 97.43%
[12] Train Loss: 0.0576	Test Loss: 0.1082	Accuracy: 96.67%
[13] Train Loss: 0.0518	Test Loss: 0.0827	Accuracy: 97.44%
[14] Train Loss: 0.0455	Test Loss: 0.1429	Accuracy: 95.36%
[15] Train Loss: 0.0414	Test Loss: 0.1027	Accuracy: 96.96%
[16] Train Loss: 0.0381	Test Loss: 0.0905	Accuracy: 97.31%
[17] Train Loss: 0.0336	Test Loss: 0.0740	Accuracy: 97.90%
[18] T

## 과제 4
과제 3 부분의 성능을 지금까지 배운 지식을 바탕으로 향상시켜보세요

- Hint : Activation function, hyperparameter setting

In [None]:
# Assignment 4 구현은 여기서 ()
## 첫번째 시도
model = Net()

EPOCHS = 100
new_optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) #optimizer, learning rate 정의
criterion = nn.CrossEntropyLoss()

for epoch in range(1, EPOCHS + 1):
    train_loss = train(model, train_loader, new_optimizer)
    test_loss, test_accuracy = evaluate(model, test_loader)
    
    print('[{}] Train Loss: {:.4f}\tTest Loss: {:.4f}\tAccuracy: {:.2f}%'.format(
          epoch, train_loss, test_loss, test_accuracy))

[1] Train Loss: 1.8139	Test Loss: 0.7945	Accuracy: 79.16%
[2] Train Loss: 0.5532	Test Loss: 0.4071	Accuracy: 88.54%
[3] Train Loss: 0.3872	Test Loss: 0.3344	Accuracy: 90.42%
[4] Train Loss: 0.3332	Test Loss: 0.3030	Accuracy: 91.15%
[5] Train Loss: 0.3014	Test Loss: 0.2768	Accuracy: 92.06%
[6] Train Loss: 0.2773	Test Loss: 0.2576	Accuracy: 92.59%
[7] Train Loss: 0.2575	Test Loss: 0.2406	Accuracy: 93.21%
[8] Train Loss: 0.2387	Test Loss: 0.2233	Accuracy: 93.51%
[9] Train Loss: 0.2243	Test Loss: 0.2093	Accuracy: 93.91%
[10] Train Loss: 0.2112	Test Loss: 0.2003	Accuracy: 94.16%
[11] Train Loss: 0.1976	Test Loss: 0.1882	Accuracy: 94.61%
[12] Train Loss: 0.1857	Test Loss: 0.1807	Accuracy: 94.75%
[13] Train Loss: 0.1765	Test Loss: 0.1713	Accuracy: 94.97%
[14] Train Loss: 0.1670	Test Loss: 0.1636	Accuracy: 95.27%
[15] Train Loss: 0.1578	Test Loss: 0.1554	Accuracy: 95.55%
[16] Train Loss: 0.1500	Test Loss: 0.1476	Accuracy: 95.79%
[17] Train Loss: 0.1427	Test Loss: 0.1420	Accuracy: 95.96%
[18] T

In [None]:
# Assignment 4 구현은 여기서 ()
## 두번째 시도
class NewNet(nn.Module):
    def __init__(self):
        super(NewNet, self).__init__() #forward
        self.layer1 = nn.Linear(784,128) #이미지의 크기가 28*28. 색 흑백이기 때문임.
        self.layer2 = nn.Linear(128,64)  
        self.layer3 = nn.Linear(64,10)  #classification을 할 때 0~9까지 볼 것이기 때문에 10개임.
        self.leakyrelu = nn.LeakyReLU(0.1)
        
    def forward(self, x): #이미 있는 것들을 곱해주는 .. 
        x = x.view(-1, 784)
        out = self.layer1(x)
        out = self.leakyrelu(out)
        out = self.layer2(out)
        out = self.leakyrelu(out)
        out = self.layer3(out)

        return out

model_new = NewNet()

EPOCHS = 50
new_optimizer = optim.SGD(model_new.parameters(), lr=0.01, momentum=0.9) #optimizer, learning rate 정의
criterion = nn.CrossEntropyLoss()

for epoch in range(1, EPOCHS + 1):
    train_loss = train(model_new, train_loader, new_optimizer)
    test_loss, test_accuracy = evaluate(model_new, test_loader)
    
    print('[{}] Train Loss: {:.4f}\tTest Loss: {:.4f}\tAccuracy: {:.2f}%'.format(
          epoch, train_loss, test_loss, test_accuracy))

[1] Train Loss: 1.7028	Test Loss: 0.6934	Accuracy: 80.10%
[2] Train Loss: 0.5068	Test Loss: 0.3845	Accuracy: 89.09%
[3] Train Loss: 0.3710	Test Loss: 0.3247	Accuracy: 90.51%
[4] Train Loss: 0.3266	Test Loss: 0.2967	Accuracy: 91.53%
[5] Train Loss: 0.2987	Test Loss: 0.2777	Accuracy: 92.06%
[6] Train Loss: 0.2762	Test Loss: 0.2565	Accuracy: 92.72%
[7] Train Loss: 0.2577	Test Loss: 0.2399	Accuracy: 93.20%
[8] Train Loss: 0.2418	Test Loss: 0.2315	Accuracy: 93.29%
[9] Train Loss: 0.2277	Test Loss: 0.2143	Accuracy: 93.92%
[10] Train Loss: 0.2136	Test Loss: 0.2019	Accuracy: 94.29%
[11] Train Loss: 0.2022	Test Loss: 0.1918	Accuracy: 94.41%
[12] Train Loss: 0.1914	Test Loss: 0.1840	Accuracy: 94.59%
[13] Train Loss: 0.1806	Test Loss: 0.1780	Accuracy: 94.91%
[14] Train Loss: 0.1712	Test Loss: 0.1687	Accuracy: 94.96%
[15] Train Loss: 0.1640	Test Loss: 0.1599	Accuracy: 95.22%
[16] Train Loss: 0.1554	Test Loss: 0.1549	Accuracy: 95.42%
[17] Train Loss: 0.1491	Test Loss: 0.1503	Accuracy: 95.54%
[18] T

**무엇을 보완하였고, 왜 보완되었는지에 대한 자유 서술 (아래에)**

In [None]:
# learning rate가 아주 큰 상황이었기 때문에 이를 조금 더 작게 해주어 loss를 최소화하는 weight로 잘 찾아갈 수 있도록 하였다.
# 또한 momentum 을 추가함으로써 학습 방향을 유지하도록 해서 안장점 등에서 빠져나오지 못하는 문제를 해결할 수 있도록 하였다.

# 하지만 test loss가 epoch 50 이후부터는 꾸준히 감소하지 않고 있음을 확인 
# 하지만 정확도가 0.3%만 향상된 것(3개의 사진만 더 잘 분류하게 된 것)을 확인

# activation 함수로 leaky relu 적용하고 epoch 수를 줄여보았다
# test loss는 그래도 지속적으로 감소하였지만(epoch 43 전까지), 성능은 첫번째 시도때의 모델보다 오히려 안 좋아졌음을 확인하였다.