# prac1 : DropOut

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torch.utils.data as data_utils
import torchvision
import torchvision.transforms as transforms

import numpy as np
import matplotlib.pyplot as plt
import random

In [2]:
device= 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

random.seed(777)
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

cpu


In [3]:
train_dataset = torchvision.datasets.MNIST(root='./data', train=True, transform = transforms.ToTensor(), download=True)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform = transforms.ToTensor(), download=True)

In [4]:
batch_size = 100

train_loader = DataLoader(dataset=train_dataset, batch_size = batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size = batch_size, shuffle=False)

In [5]:
## model ##
class MLP_Dropout(nn.Module):
    def __init__(self):
        super(MLP_Dropout, self).__init__()
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 64)
        self.fc3 = nn.Linear(64, 10)
        self.dp1 = nn.Dropout(p = 0.4)
        self.dp2 = nn.Dropout(p = 0.2)
        
    def forward(self, x):
        h1 = F.relu(self.fc1(x))
        h1dp = self.dp1(h1)
        h2 = F.relu(self.fc2(h1dp))
        h2dp = self.dp2(h2)
        output = self.fc3(h2dp)
        return output

In [6]:
# define parameter, model
num_epochs = 10
learning_rate = 0.01

model = MLP_Dropout().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [7]:
## model train
loss_list = []

for epoch_num in range(num_epochs):
    average_cost = 0
    model.train() # set model for train
    
    for batch_idx, (x_data, y_label) in enumerate(train_loader):
        num_of_mini_batch = len(train_loader)
        
        x_data = x_data.reshape(-1, 28*28)
        input_image = x_data.to(device)
        label = y_label.to(device)
        optimizer.zero_grad()
        y_predict = model(input_image)
        loss = criterion(y_predict, label)
        loss.backward()
        optimizer.step()
        
        average_cost = average_cost + (loss.item() / num_of_mini_batch)
        loss_list.append(loss)
        
    print("Epoch {} Loss {:.5f}".format((epoch_num+1), average_cost))

Epoch 1 Loss 0.37455
Epoch 2 Loss 0.27538
Epoch 3 Loss 0.24111
Epoch 4 Loss 0.23204
Epoch 5 Loss 0.23526
Epoch 6 Loss 0.21981
Epoch 7 Loss 0.21843
Epoch 8 Loss 0.21434
Epoch 9 Loss 0.21188
Epoch 10 Loss 0.19830


In [8]:
## model validation
with torch.no_grad():
    num_total_data = 0
    correct = 0 
    model.eval() # set model for test
    
    for batch_idx, (images, labels) in enumerate(test_loader):
        images = images.reshape(-1, 28*28)
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        outputs_softmax = F.softmax(outputs, dim=1)
        predicted = torch.argmax(outputs_softmax, dim=1)
        
        num_total_data += len(images)
        answer = sum(labels==predicted).item()
        correct += answer
        
print("Model accuracy {:.5f}%".format((correct / num_total_data)*100))

Model accuracy 96.69000%


# prac2 : Batch normalization

In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torch.utils.data as data_utils
import torchvision
import torchvision.transforms as transforms

import numpy as np
import matplotlib.pyplot as plt
import random

In [10]:
device= 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

random.seed(777)
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

cpu


In [11]:
train_dataset = torchvision.datasets.MNIST(root='./data', train=True, transform = transforms.ToTensor(), download=True)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform = transforms.ToTensor(), download=True)

In [12]:
batch_size = 100

train_loader = DataLoader(dataset=train_dataset, batch_size = batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size = batch_size, shuffle=False)

In [13]:
# define Model
class Batch_Norm(nn.Module):
    def __init__(self):
        super(Batch_Norm, self).__init__()
        
        self.network = nn.Sequential(nn.Linear(784, 256),
                                    nn.BatchNorm1d(256),
                                    nn.ReLU(),
                                    nn.Linear(256, 64),
                                    nn.BatchNorm1d(64),
                                    nn.ReLU(),
                                    nn.Linear(64, 10))
        
    def forward(self, x):
        output = self.network(x)
        return output

In [14]:
# define hyperparameters
num_epochs = 10
learning_rate = 0.01

model = Batch_Norm().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [15]:
## model train
loss_list = []

for epoch_num in range(num_epochs):
    average_cost = 0
    model.train() # set model for train
    
    for batch_idx, (x_data, y_label) in enumerate(train_loader):
        num_of_mini_batch = len(train_loader)
        
        x_data = x_data.reshape(-1, 28*28)
        input_image = x_data.to(device)
        label = y_label.to(device)
        optimizer.zero_grad()
        y_predict = model(input_image)
        loss = criterion(y_predict, label)
        loss.backward()
        optimizer.step()
        
        average_cost = average_cost + (loss.item() / num_of_mini_batch)
        loss_list.append(loss)
        
    print("Epoch {} Loss {:.5f}".format((epoch_num+1), average_cost))

Epoch 1 Loss 0.19244
Epoch 2 Loss 0.08821
Epoch 3 Loss 0.06432
Epoch 4 Loss 0.05081
Epoch 5 Loss 0.04461
Epoch 6 Loss 0.03755
Epoch 7 Loss 0.03308
Epoch 8 Loss 0.02928
Epoch 9 Loss 0.02511
Epoch 10 Loss 0.02480


In [16]:
## model validation
with torch.no_grad():
    num_total_data = 0
    correct = 0 
    model.eval() # set model for test
    
    for batch_idx, (images, labels) in enumerate(test_loader):
        images = images.reshape(-1, 28*28)
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        outputs_softmax = F.softmax(outputs, dim=1)
        predicted = torch.argmax(outputs_softmax, dim=1)
        
        num_total_data += len(images)
        answer = sum(labels==predicted).item()
        correct += answer
        
print("Model accuracy {:.5f}%".format((correct / num_total_data)*100))

Model accuracy 97.85000%


# Homework : Generate a classification model for MNIST dataset (99%)

- model 성능 : 최종 98.28%
- model 구조
    1. input layer 1개, hidden layer 2개, output layer 1개로 총 4개
    2. activation function : leaky ReLU 사용
    3. epoch은 30, learning rate 는 0.05로 설정한 후 overfitting 을 막기 위해 dropout 방법 사용
    4. 실습때 사용해본 dropout과 batch nomalization을 함께 사용해봄. (다다익선일 수 있으니 두가지의 규제방법 함께 사용)


- 규제방법을 두가지를 함께 사용했으니 좀 더 학습시켜도 될 것 같아 hidden layer 를 하나 더 추가해보았고 epoch 수도 늘려보았다. ReLU function도 음수일때 activation zero 가 되는 문제가 있다 했으니 leaky ReLU 로 변경해 사용해보았다. 실습에서 구축한 모델에 비해 성능은 약간 좋아졌지만, 큰 차이가 나지 않는다. 
    

In [17]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torch.utils.data as data_utils
import torchvision
import torchvision.transforms as transforms

import numpy as np
import matplotlib.pyplot as plt
import random

In [18]:
device= 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

random.seed(777)
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

cpu


In [19]:
train_dataset = torchvision.datasets.MNIST(root='./data', train=True, transform = transforms.ToTensor(), download=True)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform = transforms.ToTensor(), download=True)

In [20]:
batch_size = 100

train_loader = DataLoader(dataset=train_dataset, batch_size = batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size = batch_size, shuffle=False)

In [26]:
## model2 ##
class MLP_Dropout(nn.Module):
    def __init__(self):
        super(MLP_Dropout, self).__init__()
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 10)
        self.dp1 = nn.Dropout(p = 0.4)
        self.dp2 = nn.Dropout(p = 0.2)
        self.dp3 = nn.Dropout(p = 0.1)
        self.bn1 = nn.BatchNorm1d(256)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(64)
        
    def forward(self, x):
        h1 = F.leaky_relu(self.bn1(self.fc1(x)))
        h1dp = self.dp1(h1)
        h2 = F.leaky_relu(self.bn2(self.fc2(h1dp)))
        h2dp = self.dp2(h2)
        h3 = F.leaky_relu(self.bn3(self.fc3(h2dp)))
        h3dp = self.dp3(h3)
        output = self.fc4(h3dp)
        return output

In [43]:
# define hyperparameters
num_epochs = 30
learning_rate = 0.005

model = Batch_Norm().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [44]:
## model train
loss_list = []

for epoch_num in range(num_epochs):
    average_cost = 0
    model.train() # set model for train
    
    for batch_idx, (x_data, y_label) in enumerate(train_loader):
        num_of_mini_batch = len(train_loader)
        
        x_data = x_data.reshape(-1, 28*28)
        input_image = x_data.to(device)
        label = y_label.to(device)
        optimizer.zero_grad()
        y_predict = model(input_image)
        loss = criterion(y_predict, label)
        loss.backward()
        optimizer.step()
        
        average_cost = average_cost + (loss.item() / num_of_mini_batch)
        loss_list.append(loss)
        
    print("Epoch {} Loss {:.5f}".format((epoch_num+1), average_cost))

Epoch 1 Loss 0.18972
Epoch 2 Loss 0.08377
Epoch 3 Loss 0.05802
Epoch 4 Loss 0.04754
Epoch 5 Loss 0.03720
Epoch 6 Loss 0.03243
Epoch 7 Loss 0.02795
Epoch 8 Loss 0.02512
Epoch 9 Loss 0.02433
Epoch 10 Loss 0.01980
Epoch 11 Loss 0.01772
Epoch 12 Loss 0.01779
Epoch 13 Loss 0.01542
Epoch 14 Loss 0.01266
Epoch 15 Loss 0.01631
Epoch 16 Loss 0.01287
Epoch 17 Loss 0.01284
Epoch 18 Loss 0.01271
Epoch 19 Loss 0.01049
Epoch 20 Loss 0.01165
Epoch 21 Loss 0.01248
Epoch 22 Loss 0.00763
Epoch 23 Loss 0.00821
Epoch 24 Loss 0.00933
Epoch 25 Loss 0.00937
Epoch 26 Loss 0.00836
Epoch 27 Loss 0.00792
Epoch 28 Loss 0.00670
Epoch 29 Loss 0.00974
Epoch 30 Loss 0.00859


In [45]:
## model validation
with torch.no_grad():
    num_total_data = 0
    correct = 0 
    model.eval() # set model for test
    
    for batch_idx, (images, labels) in enumerate(test_loader):
        images = images.reshape(-1, 28*28)
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        outputs_softmax = F.softmax(outputs, dim=1)
        predicted = torch.argmax(outputs_softmax, dim=1)
        
        num_total_data += len(images)
        answer = sum(labels==predicted).item()
        correct += answer
        
print("Model accuracy {:.5f}%".format((correct / num_total_data)*100))

Model accuracy 98.28000%
