In [1]:
import numpy as np
from torchvision import transforms, datasets
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

## 과제 1
ReLu activation function과 derivative function을 구현해보세요
- Hint : np.maximum 함수 사용하면 편리합니다
- 다른 방법 사용하셔도 무방합니다


In [2]:
def relu(x):
  return np.maximum(0, x)

In [3]:
def d_relu(x):
  if np.maximum(0, x) == x:
    return 1
  else:
    return 0

## 과제 2
Deep Learning Basic 코드 파일의 MLP implementation with Numpy library using MNIST dataset 코드 참고해서
Three layer MLP 일 때의 backward_pass 함수를 완성해주세요.   
- Hint : 코드 파일의 예시는 Two layer MLP


In [None]:
def backward_pass(x, y_true, params):
  dS3 = params["A3"] - y_true

  grads = {}

  grads["dW3"] =  np.dot(dS3, params["A2"].T)/x.shape[1]
  grads["db3"] =  (1/x.shape[1])*np.sum(dS3, axis=1, keepdims=True)/x.shape[1]

  dA2 = np.dot(params["W3"].T, dS3)
  dS2 = dA2 * d_relu(params["S2"])

  grads["dW2"] =  np.dot(dS2, params["A1"].T)/x.shape[1]
  grads["db2"] =  (1/x.shape[1])*np.sum(dS2, axis=1, keepdims=True)/x.shape[1]

  dA1 = np.dot(params["W2"].T, dS2)
  dS1 = dA1 * d_relu(params["S1"])

  grads["dW1"] = np.dot(dS1, x.T)/x.shape[1]
  grads["db1"] = np.sum(dS1, axis=1, keepdims=True)/x.shape[1]

  return grads

## 과제 3
Deep Learning Basic 코드 파일의 MLP implementation with Pytorch library using MNIST dataset 코드 참고해서
Three layer MLP를 구한후, 학습을 돌려 보세요

hyperparameter는 다음과 같이 설정

- epochs : 100
- hiddensize : 128, 64 (two layer)
- learning_rate : 0.5

In [4]:
transform = transforms.Compose([
    transforms.ToTensor()
])

In [5]:
trainset = datasets.MNIST(
    root      = './.data/', 
    train     = True,
    download  = True,
    transform = transform
)
testset = datasets.MNIST(
    root      = './.data/', 
    train     = False,
    download  = True,
    transform = transform
)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./.data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./.data/MNIST/raw/train-images-idx3-ubyte.gz to ./.data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./.data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./.data/MNIST/raw/train-labels-idx1-ubyte.gz to ./.data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./.data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./.data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./.data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./.data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./.data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./.data/MNIST/raw



In [6]:
BATCH_SIZE = 512
# train set과 test set 각각에 대하여 DataLoader를 생성합니다.
# shuffle=True 매개변수를 넣어 데이터를 섞어주세요.
train_loader = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)
test_loader =  DataLoader(testset, batch_size=BATCH_SIZE, shuffle=True)

In [7]:
# Assignment 3 구현은 여기서 ()]
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.layer1 = nn.Linear(784,128) # 이미지 크기 28*28=784, 중간층 노드 개수 64 임의 지정
        self.layer2 = nn.Linear(128,64)
        self.layer3 = nn.Linear(64,10)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        x = x.view(-1, 784)
        out = self.layer1(x)
        out = self.relu(out)
        out = self.layer2(out)
        out = self.relu(out)
        out = self.layer3(out)
        out = self.relu(out)

        return out

In [8]:
model = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.5)

In [9]:
def train(model, train_loader, optimizer):
    model.train()
    # 배치 당 loss 값을 담을 리스트 생성
    batch_losses = []

    for data, target in train_loader:
        # 옵티마이저의 기울기 초기화
        optimizer.zero_grad()

        # y pred 값 산출
        output = model(data)
        # loss 계산
        # 정답 데이터와의 cross entropy loss 계산
        # 이 loss를 배치 당 loss로 보관
        loss = criterion(output, target)
        batch_losses.append(loss)

        # 기울기 계산
        loss.backward()

        # 가중치 업데이트!
        optimizer.step()
        
    # 배치당 평균 loss 계산
    avg_loss = sum(batch_losses) / len(batch_losses)
    
    return avg_loss

def evaluate(model, test_loader):
    # 모델을 평가 모드로 전환
    model.eval()

    batch_losses = []
    correct = 0 

    with torch.no_grad(): 
        for data, target in test_loader:
            # 예측값 생성
            output = model(data)

            # loss 계산 (이전과 동일)
            loss = criterion(output, target)
            batch_losses.append(loss)

           # Accuracy 계산
           # y pred와 y가 일치하면 correct에 1을 더해주기
            pred = output.max(1, keepdim=True)[1]

            # eq() 함수는 값이 일치하면 1을, 아니면 0을 출력.
            correct += pred.eq(target.view_as(pred)).sum().item()

    # 배치 당 평균 loss 계산 
    avg_loss =  sum(batch_losses) / len(batch_losses)

    #정확도 계산
    accuracy = 100. * correct / len(test_loader.dataset)

    return avg_loss, accuracy

In [None]:
EPOCHS = 100

for epoch in range(1, EPOCHS + 1):
    train_loss = train(model, train_loader, optimizer)
    test_loss, test_accuracy = evaluate(model, test_loader)
    
    print('[{}] Train Loss: {:.4f}\tTest Loss: {:.4f}\tAccuracy: {:.2f}%'.format(
          epoch, train_loss, test_loss, test_accuracy))

[1] Train Loss: 0.7185	Test Loss: 0.4924	Accuracy: 89.00%
[2] Train Loss: 0.4162	Test Loss: 0.8240	Accuracy: 78.17%
[3] Train Loss: 0.3874	Test Loss: 0.3585	Accuracy: 94.88%
[4] Train Loss: 0.3436	Test Loss: 0.3440	Accuracy: 95.30%
[5] Train Loss: 0.3266	Test Loss: 0.3447	Accuracy: 95.46%
[6] Train Loss: 0.3132	Test Loss: 0.4219	Accuracy: 92.56%
[7] Train Loss: 0.3028	Test Loss: 0.3434	Accuracy: 95.71%
[8] Train Loss: 0.2913	Test Loss: 0.3176	Accuracy: 96.16%
[9] Train Loss: 0.2873	Test Loss: 0.4452	Accuracy: 92.02%
[10] Train Loss: 0.3384	Test Loss: 0.3039	Accuracy: 96.97%
[11] Train Loss: 0.2797	Test Loss: 0.4140	Accuracy: 93.61%
[12] Train Loss: 0.2791	Test Loss: 0.2988	Accuracy: 97.21%
[13] Train Loss: 0.2672	Test Loss: 0.3287	Accuracy: 96.11%
[14] Train Loss: 0.2655	Test Loss: 0.2936	Accuracy: 97.27%
[15] Train Loss: 0.2596	Test Loss: 0.2947	Accuracy: 97.35%
[16] Train Loss: 0.2578	Test Loss: 0.3115	Accuracy: 97.11%
[17] Train Loss: 0.2543	Test Loss: 0.3054	Accuracy: 97.15%
[18] T

## 과제 4
과제 3 부분의 성능을 지금까지 배운 지식을 바탕으로 향상시켜보세요

- Hint : Activation function, hyperparameter setting

In [None]:
# Assignment 4 구현은 여기서 ()
model = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

EPOCHS = 100

for epoch in range(1, EPOCHS + 1):
    train_loss = train(model, train_loader, optimizer)
    test_loss, test_accuracy = evaluate(model, test_loader)
    
    print('[{}] Train Loss: {:.4f}\tTest Loss: {:.4f}\tAccuracy: {:.2f}%'.format(
          epoch, train_loss, test_loss, test_accuracy))

[1] Train Loss: 2.3012	Test Loss: 2.2980	Accuracy: 13.45%
[2] Train Loss: 2.2933	Test Loss: 2.2881	Accuracy: 21.71%
[3] Train Loss: 2.2789	Test Loss: 2.2654	Accuracy: 31.69%
[4] Train Loss: 2.2461	Test Loss: 2.2225	Accuracy: 36.41%
[5] Train Loss: 2.1912	Test Loss: 2.1524	Accuracy: 36.86%
[6] Train Loss: 2.1048	Test Loss: 2.0454	Accuracy: 38.81%
[7] Train Loss: 1.9811	Test Loss: 1.9091	Accuracy: 49.07%
[8] Train Loss: 1.8431	Test Loss: 1.7651	Accuracy: 53.51%
[9] Train Loss: 1.7040	Test Loss: 1.6307	Accuracy: 54.91%
[10] Train Loss: 1.5752	Test Loss: 1.5106	Accuracy: 56.62%
[11] Train Loss: 1.4728	Test Loss: 1.4214	Accuracy: 58.15%
[12] Train Loss: 1.3979	Test Loss: 1.3608	Accuracy: 59.43%
[13] Train Loss: 1.3442	Test Loss: 1.3114	Accuracy: 60.78%
[14] Train Loss: 1.2996	Test Loss: 1.2677	Accuracy: 61.32%
[15] Train Loss: 1.2608	Test Loss: 1.2355	Accuracy: 61.72%
[16] Train Loss: 1.2297	Test Loss: 1.2056	Accuracy: 62.26%
[17] Train Loss: 1.2023	Test Loss: 1.1792	Accuracy: 62.75%
[18] T

**무엇을 보완하였고, 왜 보완되었는지에 대한 자유 서술 (아래에)**

- Learning Rate가 0.5였을 때 loss가 수렴하지 않고 계속 움직이는 경향을 보였다.
- Learning Rate가 너무 커서 발생하는 현상이라 판단하고, Learning Rate를 0.01로 줄여 진행하였다.
- 결과적으로 loss가 잘 수렴하였다.
- 하지만, 정확도가 떨어진 것이 아쉬워 ReLu 대신 LeakyRelu를 사용하고, momentum을 지정해 다시 시도해 보았다.
- 결과적으로 이전보다 정확도가 높게 나타났다.

In [15]:
BATCH_SIZE = 512
# train set과 test set 각각에 대하여 DataLoader를 생성합니다.
# shuffle=True 매개변수를 넣어 데이터를 섞어주세요.
train_loader = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)
test_loader =  DataLoader(testset, batch_size=BATCH_SIZE, shuffle=True)

In [16]:
# Assignment 3 구현은 여기서 ()]
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.layer1 = nn.Linear(784,128) # 이미지 크기 28*28=784, 중간층 노드 개수 64 임의 지정
        self.layer2 = nn.Linear(128,64)
        self.layer3 = nn.Linear(64,10)
        self.leakyrelu = nn.LeakyReLU()
        
    def forward(self, x):
        x = x.view(-1, 784)
        out = self.layer1(x)
        out = self.leakyrelu(out)
        out = self.layer2(out)
        out = self.leakyrelu(out)
        out = self.layer3(out)
        out = self.leakyrelu(out)

        return out

In [17]:
model = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

In [19]:
EPOCHS = 50

for epoch in range(1, EPOCHS + 1):
    train_loss = train(model, train_loader, optimizer)
    test_loss, test_accuracy = evaluate(model, test_loader)
    
    print('[{}] Train Loss: {:.4f}\tTest Loss: {:.4f}\tAccuracy: {:.2f}%'.format(
          epoch, train_loss, test_loss, test_accuracy))

[1] Train Loss: 0.0720	Test Loss: 0.0917	Accuracy: 97.27%
[2] Train Loss: 0.0693	Test Loss: 0.0914	Accuracy: 97.20%
[3] Train Loss: 0.0666	Test Loss: 0.0902	Accuracy: 97.35%
[4] Train Loss: 0.0648	Test Loss: 0.0906	Accuracy: 97.27%
[5] Train Loss: 0.0629	Test Loss: 0.0886	Accuracy: 97.28%
[6] Train Loss: 0.0608	Test Loss: 0.0841	Accuracy: 97.31%
[7] Train Loss: 0.0584	Test Loss: 0.0873	Accuracy: 97.39%
[8] Train Loss: 0.0566	Test Loss: 0.0856	Accuracy: 97.42%
[9] Train Loss: 0.0546	Test Loss: 0.0827	Accuracy: 97.44%
[10] Train Loss: 0.0531	Test Loss: 0.0837	Accuracy: 97.50%
[11] Train Loss: 0.0517	Test Loss: 0.0796	Accuracy: 97.51%
[12] Train Loss: 0.0497	Test Loss: 0.0803	Accuracy: 97.52%
[13] Train Loss: 0.0489	Test Loss: 0.0786	Accuracy: 97.60%
[14] Train Loss: 0.0466	Test Loss: 0.0787	Accuracy: 97.57%
[15] Train Loss: 0.0453	Test Loss: 0.0775	Accuracy: 97.65%
[16] Train Loss: 0.0441	Test Loss: 0.0772	Accuracy: 97.59%
[17] Train Loss: 0.0430	Test Loss: 0.0768	Accuracy: 97.74%
[18] T