In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## 과제 1
ReLu activation function과 derivative function을 구현해보세요
- Hint : np.maximum 함수 사용하면 편리합니다
- 다른 방법 사용하셔도 무방합니다


In [None]:
def relu(x):
  relu_value = np.maximum(0, x)
  return relu_value

In [None]:
def d_relu(x):
  if x > 0:
    d_relu_value = 1
  else:
    d_relu_value = 0
  return d_relu_value

## 과제 2
Deep Learning Basic 코드 파일의 MLP implementation with Numpy library using MNIST dataset 코드 참고해서
Three layer MLP 일 때의 backward_pass 함수를 완성해주세요.   
- Hint : 코드 파일의 예시는 Two layer MLP


In [None]:
def backward_pass(x, y_true, params):

  dS3 = params["A3"] - y_true

  grads = {}

  grads["dW3"] = np.dot(dS3, params["A2"].T)/x.shape[1]
  grads["db3"] = (1/x.shape[1])*np.sum(dS3, axis=1, keepdims=True)/x.shape[1]

  dA2 = np.dot(params["W3"].T, dS3)
  dS2 = dA2 * d_relu(params["S2"])

  grads["dW2"] = np.dot(dS2, params["A1"].T)/x.shape[1]
  grads["db2"] = np.sum(dS2, axis=1, keepdims=True)/x.shape[1]

  dA1 = np.dot(params["W2"].T, dS2)
  dS1 = dA1 * d_sigmoid(params["S1"])

  grads["dW1"] = np.dot(dS1, x.T)/x.shape[1]
  grads["db1"] = np.sum(dS1, axis=1, keepdims=True)/x.shape[1]

  return grads

## 과제 3
Deep Learning Basic 코드 파일의 MLP implementation with Pytorch library using MNIST dataset 코드 참고해서
Three layer MLP를 구한후, 학습을 돌려 보세요

hyperparameter는 다음과 같이 설정

- epochs : 100
- hiddensize : 128, 64 (two layer)
- learning_rate : 0.5

In [None]:
# Assignment 3 구현은 여기서 ()

In [None]:
from torchvision import transforms, datasets
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

In [None]:
# 이미지를 텐서로 변경
transform = transforms.Compose([
    transforms.ToTensor()
])

In [None]:
trainset = datasets.MNIST(
    root      = './.data/', 
    train     = True,
    download  = True,
    transform = transform
)
testset = datasets.MNIST(
    root      = './.data/', 
    train     = False,
    download  = True,
    transform = transform
)

In [None]:
BATCH_SIZE = 512
# train set과 test set 각각에 대하여 DataLoader를 생성합니다.
# shuffle=True 매개변수를 넣어 데이터를 섞어주세요.
train_loader = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)
test_loader =  DataLoader(testset, batch_size=BATCH_SIZE, shuffle=True)

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.layer1 = nn.Linear(784,128)
        self.layer2 = nn.Linear(128,64)
        self.layer3 = nn.Linear(64,10)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        x = x.view(-1, 784)
        out = self.layer1(x)
        out = self.relu(out)
        out = self.layer2(out)
        out = self.relu(out)
        out = self.layer3(out)

        return out

In [None]:
model = Net()
model

Net(
  (layer1): Linear(in_features=784, out_features=128, bias=True)
  (layer2): Linear(in_features=128, out_features=64, bias=True)
  (layer3): Linear(in_features=64, out_features=10, bias=True)
  (relu): ReLU()
)

In [None]:
list(model.parameters()) # 행렬들을 직접 살펴볼 수 있음
                         # require_true 얘는 학습되는 애구나 알 수 있음

[Parameter containing:
 tensor([[ 0.0044,  0.0067, -0.0316,  ...,  0.0087,  0.0148,  0.0047],
         [ 0.0049, -0.0003,  0.0028,  ..., -0.0088, -0.0177, -0.0105],
         [-0.0218, -0.0031,  0.0189,  ..., -0.0156, -0.0233, -0.0086],
         ...,
         [-0.0130,  0.0155,  0.0198,  ...,  0.0018, -0.0025, -0.0299],
         [ 0.0130,  0.0128,  0.0343,  ...,  0.0174, -0.0022,  0.0129],
         [-0.0235, -0.0335, -0.0069,  ..., -0.0110,  0.0164,  0.0124]],
        requires_grad=True), Parameter containing:
 tensor([-2.9174e-02,  2.9836e-02,  2.5720e-02,  1.8705e-02, -3.3364e-02,
         -2.2398e-02,  1.3973e-02,  1.0171e-02, -2.5284e-02, -2.3829e-02,
          9.5126e-03, -2.4020e-02, -1.1689e-02,  2.1498e-02,  1.9857e-02,
         -3.5679e-02, -2.0950e-02,  2.9340e-03, -2.0010e-07,  3.5664e-02,
         -3.2953e-03,  2.1708e-02,  1.3812e-02, -8.0775e-03, -1.6894e-02,
          2.8457e-03, -2.4153e-02,  2.8532e-02, -2.7169e-02,  9.9044e-03,
          3.0376e-02,  2.3820e-02,  4.059

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.5)

In [None]:
def train(model, train_loader, optimizer):
    model.train()
    # 배치 당 loss 값을 담을 리스트 생성
    batch_losses = []

    for data, target in train_loader:
        # 옵티마이저의 기울기 초기화
        optimizer.zero_grad()

        # y pred 값 산출
        output = model(data)
        # loss 계산
        # 정답 데이터와의 cross entropy loss 계산
        # 이 loss를 배치 당 loss로 보관
        loss = criterion(output, target)
        batch_losses.append(loss)

        # 기울기 계산
        loss.backward()

        # 가중치 업데이트!
        optimizer.step()
        
    # 배치당 평균 loss 계산
    avg_loss = sum(batch_losses) / len(batch_losses)
    
    return avg_loss

In [None]:
def evaluate(model, test_loader):
    # 모델을 평가 모드로 전환
    model.eval()

    batch_losses = []
    correct = 0 

    with torch.no_grad(): 
        for data, target in test_loader:
            # 예측값 생성
            output = model(data)

            # loss 계산 (이전과 동일)
            loss = criterion(output, target)
            batch_losses.append(loss)

           # Accuracy 계산
           # y pred와 y가 일치하면 correct에 1을 더해주기
            pred = output.max(1, keepdim=True)[1]

            # eq() 함수는 값이 일치하면 1을, 아니면 0을 출력.
            correct += pred.eq(target.view_as(pred)).sum().item()

    # 배치 당 평균 loss 계산 
    avg_loss =  sum(batch_losses) / len(batch_losses)

    #정확도 계산
    accuracy = 100. * correct / len(test_loader.dataset)

    return avg_loss, accuracy

In [None]:
EPOCHS = 100

for epoch in range(1, EPOCHS + 1):
    train_loss = train(model, train_loader, optimizer)
    test_loss, test_accuracy = evaluate(model, test_loader)
    
    print('[{}] Train Loss: {:.4f}\tTest Loss: {:.4f}\tAccuracy: {:.2f}%'.format(
          epoch, train_loss, test_loss, test_accuracy))

[1] Train Loss: 0.7838	Test Loss: 0.4859	Accuracy: 84.67%
[2] Train Loss: 0.2416	Test Loss: 0.2676	Accuracy: 91.20%
[3] Train Loss: 0.1680	Test Loss: 0.1622	Accuracy: 94.99%
[4] Train Loss: 0.1304	Test Loss: 0.1739	Accuracy: 94.45%
[5] Train Loss: 0.1099	Test Loss: 0.1412	Accuracy: 95.28%
[6] Train Loss: 0.0920	Test Loss: 0.1131	Accuracy: 96.52%
[7] Train Loss: 0.0757	Test Loss: 0.1063	Accuracy: 96.83%
[8] Train Loss: 0.0697	Test Loss: 0.1673	Accuracy: 94.46%
[9] Train Loss: 0.0591	Test Loss: 0.0758	Accuracy: 97.54%
[10] Train Loss: 0.0544	Test Loss: 0.1305	Accuracy: 96.00%
[11] Train Loss: 0.0476	Test Loss: 0.0817	Accuracy: 97.32%
[12] Train Loss: 0.0406	Test Loss: 0.0777	Accuracy: 97.55%
[13] Train Loss: 0.0359	Test Loss: 0.1197	Accuracy: 96.20%
[14] Train Loss: 0.0332	Test Loss: 0.1211	Accuracy: 96.31%
[15] Train Loss: 0.0296	Test Loss: 0.0740	Accuracy: 97.73%
[16] Train Loss: 0.0250	Test Loss: 0.0771	Accuracy: 97.61%
[17] Train Loss: 0.0221	Test Loss: 0.0700	Accuracy: 97.98%
[18] T

## 과제 4
과제 3 부분의 성능을 지금까지 배운 지식을 바탕으로 향상시켜보세요

- Hint : Activation function, hyperparameter setting

In [None]:
# Assignment 4 구현은 여기서 ()

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.layer1 = nn.Linear(784,128)
        self.layer2 = nn.Linear(128,64)
        self.layer3 = nn.Linear(64,10)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, x):
        x = x.view(-1, 784)
        out = self.layer1(x)
        out = self.relu(out)
        out = self.layer2(out)
        out = self.relu(out)
        out = self.layer3(out)
        out = self.softmax(out)

        return out

In [None]:
model = Net()
model

Net(
  (layer1): Linear(in_features=784, out_features=128, bias=True)
  (layer2): Linear(in_features=128, out_features=64, bias=True)
  (layer3): Linear(in_features=64, out_features=10, bias=True)
  (relu): ReLU()
  (softmax): Softmax(dim=1)
)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.5)

In [None]:
EPOCHS = 100

for epoch in range(1, EPOCHS + 1):
    train_loss = train(model, train_loader, optimizer)
    test_loss, test_accuracy = evaluate(model, test_loader)
    
    print('[{}] Train Loss: {:.4f}\tTest Loss: {:.4f}\tAccuracy: {:.2f}%'.format(
          epoch, train_loss, test_loss, test_accuracy))

[1] Train Loss: 2.2004	Test Loss: 1.9302	Accuracy: 59.94%
[2] Train Loss: 1.7552	Test Loss: 1.7187	Accuracy: 76.00%
[3] Train Loss: 1.6552	Test Loss: 1.6492	Accuracy: 82.04%
[4] Train Loss: 1.6330	Test Loss: 1.6284	Accuracy: 83.98%
[5] Train Loss: 1.6122	Test Loss: 1.5825	Accuracy: 88.97%
[6] Train Loss: 1.5679	Test Loss: 1.5751	Accuracy: 89.57%
[7] Train Loss: 1.5550	Test Loss: 1.5595	Accuracy: 90.84%
[8] Train Loss: 1.5465	Test Loss: 1.5512	Accuracy: 91.55%
[9] Train Loss: 1.5403	Test Loss: 1.5543	Accuracy: 91.14%
[10] Train Loss: 1.5349	Test Loss: 1.5404	Accuracy: 92.48%
[11] Train Loss: 1.5300	Test Loss: 1.5434	Accuracy: 92.28%
[12] Train Loss: 1.5262	Test Loss: 1.5279	Accuracy: 93.67%
[13] Train Loss: 1.5222	Test Loss: 1.5234	Accuracy: 94.29%
[14] Train Loss: 1.5196	Test Loss: 1.5271	Accuracy: 93.87%
[15] Train Loss: 1.5165	Test Loss: 1.5327	Accuracy: 93.23%
[16] Train Loss: 1.5140	Test Loss: 1.5168	Accuracy: 94.79%
[17] Train Loss: 1.5113	Test Loss: 1.5172	Accuracy: 94.75%
[18] T

**무엇을 보완하였고, 왜 보완되었는지에 대한 자유 서술 (아래에)**

softmax 활성화 함수는 다중 분류에 효과적인 활성화 함수이다.

이 활성화 함수를 마지막 층에 적용시켰다.

그 결과 Accuracy가 97.16%로 향상되었다.