## hw2-1 : MNIST dataset에 대해 분류하는 모델 2가지 생성(torch.nn.Module)
실습의 torch.nn.Module 을 사용하였고, activation function 을 ReLU 에서 Sigmoid function 으로 변경한 모델로 test

In [1]:
# MNIST dataset 에 대한 분류문제
# import package
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torch.utils.data as data_utils

# MNIST dataset 을 불러오기 위해 import
import torchvision 
import torchvision.transforms as transforms

import numpy as np
import matplotlib.pyplot as plt
import random

In [2]:
# GPU / CPU setting
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

random.seed(777)
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

cpu


In [3]:
# tensor형으로 data 불러옴.
# 처음 불러오는거면 download 함.

train_dataset = torchvision.datasets.MNIST(root='./data', train=True, transform = transforms.ToTensor(), download=True)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform = transforms.ToTensor(), download=True)

In [4]:
# batch generation
batch_size = 100

train_loader = DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle=True)
test_loader = DataLoader(dataset = test_dataset, batch_size = batch_size, shuffle=False)

In [5]:
# model generate - activation function : sigmoid
class MLP_model(torch.nn.Module):
    def __init__(self, input_size, num_classes):
        super(MLP_model, self).__init__()
        self.input_size = input_size
        self.num_classes = num_classes
        
        # linear layer 정의 (fc : fully-connected)
        self.fc1 = torch.nn.Linear(input_size, 256)
        self.fc2 = torch.nn.Linear(256, 256)
        self.fc3 = torch.nn.Linear(256, num_classes)
        
        # activation function 정의 (불러올 수 있는 함수이므로 따로 정의하지 않아도 됨.)
        self.sigmoid = torch.nn.Sigmoid()
        
    def forward(self, x):
        fc1 = self.fc1(x)
        ac1 = self.sigmoid(fc1)
        
        fc2 = self.fc2(ac1)
        ac2 = self.sigmoid(fc2)
        
        output = self.fc3(ac2)
        
        return output
        

In [6]:
# parameter definition
input_size = 784  # 28 * 28
num_classes = 10
num_epochs = 10
learning_rate = 0.1

In [7]:
# model definition
model = MLP_model(input_size, num_classes).to(device) # model을 device에 올려주겠다.
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [8]:
# model train
loss_list = [] # loss를 출력하기 위함.
for epoch_num in range(num_epochs):
    average_cost = 0 # batch마다의 cost를 더해서 평균을 내줌.
    
    for batch_idx, (x_data, y_label) in enumerate(train_loader):
        num_of_mini_batch = len(train_loader)
        x_data = x_data.reshape(-1, 28*28) # [100, 1, 28, 28] -> [100, 784]
        input_image = x_data.to(device)
        label = y_label.to(device)
        optimizer.zero_grad()
        y_predict = model(input_image)
        loss=criterion(y_predict, label)
        loss.backward()
        optimizer.step()
        
        average_cost = average_cost + (loss.item()/num_of_mini_batch)
        loss_list.append(loss)
        
    print("Epoch {} Loss {:.5f}".format((epoch_num+1), average_cost))

Epoch 1 Loss 2.23597
Epoch 2 Loss 1.10197
Epoch 3 Loss 0.59271
Epoch 4 Loss 0.44549
Epoch 5 Loss 0.39027
Epoch 6 Loss 0.35969
Epoch 7 Loss 0.34006
Epoch 8 Loss 0.32449
Epoch 9 Loss 0.31045
Epoch 10 Loss 0.29731


In [9]:
# validation
with torch.no_grad(): # validation용이니까 gradient를 계산하지 말아라 -> 연산부담 줄여줌.
    num_total_data = 0
    correct = 0
    for batch_idx, (images, labels) in enumerate(test_loader):
        images = images.reshape(-1, 28*28)
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        
        # softmax function을 통과시켜서 결과 Get
        outputs_softmax = F.softmax(outputs, dim=1)
        predicted = torch.argmax(outputs_softmax, dim=1)
        
        num_total_data += len(images)
        answer = sum(labels==predicted).item()
        correct += answer
        
print("Model accuracy {:.5f}%".format((correct/num_total_data)*100))

Model accuracy 91.65000%


## hw2-2 : MNIST dataset에 대해 분류하는 모델 2가지 생성 (sequential API 사용)
실습의 sequential API를 사용하였고, activation function 을 ReLU 에서 Sigmoid function 으로 변경한 모델로 test

sigmoid function을 사용하여 test를 했을 때 정확도가 11%정도로 매우 낮게 나왔기 때문에 같은 모델에서 activation function만을 ReLU로 변경해서 비교진행

같은 layer 로 구성되어있는 모델에서 activation function만 변경해서 비교했을 때 MNIST dataset 에 대한 accuracy가 

- sigmoid function : 약 11%
- ReLU function : 약 98%

정도로 차이가 많이 난다.


### 1) activation function을 sigmoid로 사용했을 때

In [10]:
# 위와는 다른 방법의 model 정의 - activation function : sigmoid
linear1 = nn.Linear(784, 256)
linear2 = nn.Linear(256, 256)
linear3 = nn.Linear(256, 64)
linear4 = nn.Linear(64, 64)
linear5 = nn.Linear(64, 10)
sigmoid = torch.nn.Sigmoid()
relu = torch.nn.ReLU()

model = torch.nn.Sequential(linear1, sigmoid, linear2, sigmoid, linear3, sigmoid, linear4, sigmoid, linear5)
model = model.to(device)

In [11]:
# 파라미터, 손실함수, optimizer 정의
epoch = 10
learning_rate = 0.1
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [12]:
# model train
loss_list = [] # loss를 출력하기 위함.
for epoch_num in range(num_epochs):
    average_cost = 0 # batch마다의 cost를 더해서 평균을 내줌.
    
    for batch_idx, (x_data, y_label) in enumerate(train_loader):
        num_of_mini_batch = len(train_loader)
        x_data = x_data.reshape(-1, 28*28) # [100, 1, 28, 28] -> [100, 784]
        input_image = x_data.to(device)
        label = y_label.to(device)
        optimizer.zero_grad()
        y_predict = model(input_image)
        loss=criterion(y_predict, label)
        loss.backward()
        optimizer.step()
        
        average_cost = average_cost + (loss.item()/num_of_mini_batch)
        loss_list.append(loss)
        
    print("Sigmoid : Epoch {} Loss {:.5f}".format((epoch_num+1), average_cost))

Sigmoid : Epoch 1 Loss 2.30542
Sigmoid : Epoch 2 Loss 2.30511
Sigmoid : Epoch 3 Loss 2.30485
Sigmoid : Epoch 4 Loss 2.30450
Sigmoid : Epoch 5 Loss 2.30443
Sigmoid : Epoch 6 Loss 2.30409
Sigmoid : Epoch 7 Loss 2.30384
Sigmoid : Epoch 8 Loss 2.30358
Sigmoid : Epoch 9 Loss 2.30365
Sigmoid : Epoch 10 Loss 2.30313


In [13]:
# validation
with torch.no_grad(): # validation용이니까 gradient를 계산하지 말아라 -> 연산부담 줄여줌.
    num_total_data = 0
    correct = 0
    for batch_idx, (images, labels) in enumerate(test_loader):
        images = images.reshape(-1, 28*28)
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        
        # softmax function을 통과시켜서 결과 Get
        outputs_softmax = F.softmax(outputs, dim=1)
        predicted = torch.argmax(outputs_softmax, dim=1)
        
        num_total_data += len(images)
        answer = sum(labels==predicted).item()
        correct += answer
        
print("Model (with sigmoid) accuracy {:.5f}%".format((correct/num_total_data)*100))

Model (with sigmoid) accuracy 11.35000%


### 2) activation function을 ReLU로 사용했을 때

In [14]:
# 위와는 다른 방법의 model 정의 - activation function : sigmoid
linear1 = nn.Linear(784, 256)
linear2 = nn.Linear(256, 256)
linear3 = nn.Linear(256, 64)
linear4 = nn.Linear(64, 64)
linear5 = nn.Linear(64, 10)
sigmoid = torch.nn.Sigmoid()
relu = torch.nn.ReLU()

model = torch.nn.Sequential(linear1, relu, linear2, relu, linear3, relu, linear4, relu, linear5)
model = model.to(device)

In [15]:
# 파라미터, 손실함수, optimizer 정의
epoch = 10
learning_rate = 0.1
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [16]:
# model train
loss_list = [] # loss를 출력하기 위함.
for epoch_num in range(num_epochs):
    average_cost = 0 # batch마다의 cost를 더해서 평균을 내줌.
    
    for batch_idx, (x_data, y_label) in enumerate(train_loader):
        num_of_mini_batch = len(train_loader)
        x_data = x_data.reshape(-1, 28*28) # [100, 1, 28, 28] -> [100, 784]
        input_image = x_data.to(device)
        label = y_label.to(device)
        optimizer.zero_grad()
        y_predict = model(input_image)
        loss=criterion(y_predict, label)
        loss.backward()
        optimizer.step()
        
        average_cost = average_cost + (loss.item()/num_of_mini_batch)
        loss_list.append(loss)
        
    print("ReLU : Epoch {} Loss {:.5f}".format((epoch_num+1), average_cost))

ReLU : Epoch 1 Loss 1.26870
ReLU : Epoch 2 Loss 0.23227
ReLU : Epoch 3 Loss 0.13513
ReLU : Epoch 4 Loss 0.09763
ReLU : Epoch 5 Loss 0.07550
ReLU : Epoch 6 Loss 0.06166
ReLU : Epoch 7 Loss 0.04886
ReLU : Epoch 8 Loss 0.03918
ReLU : Epoch 9 Loss 0.03211
ReLU : Epoch 10 Loss 0.02705


In [17]:
# validation
with torch.no_grad(): # validation용이니까 gradient를 계산하지 말아라 -> 연산부담 줄여줌.
    num_total_data = 0
    correct = 0
    for batch_idx, (images, labels) in enumerate(test_loader):
        images = images.reshape(-1, 28*28)
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        
        # softmax function을 통과시켜서 결과 Get
        outputs_softmax = F.softmax(outputs, dim=1)
        predicted = torch.argmax(outputs_softmax, dim=1)
        
        num_total_data += len(images)
        answer = sum(labels==predicted).item()
        correct += answer
        
print("Model (with ReLU) accuracy {:.5f}%".format((correct/num_total_data)*100))

Model (with ReLU) accuracy 97.72000%
