In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
# from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
import numpy as np

import torchvision
import torchvision.transforms as transforms

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
# Hyper Parameters 
input_size = 784
hidden_size1 = 400
hidden_size2 = 100
num_classes = 10 # ouput_size와 동일
num_epochs = 5
batch_size = 64
learning_rate = 0.001

In [None]:
# MNIST Dataset 
trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (1.0,))])

train_dataset = torchvision.datasets.MNIST(root='./mnist', 
                            train=True, 
                            download=True,
                            transform=trans)

test_dataset = torchvision.datasets.MNIST(root='./mnist', 
                           train=False, 
                           download=True,
                           transform=trans)

# Data Loader (Input Pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=False)


<h1 id="Before-Batch-normailzation,-Dropout">Before Batch normailzation, Dropout<a class="anchor-link" href="#Before-Batch-normailzation,-Dropout">¶</a></h1>


In [None]:
# Neural Network Model (3 hidden layer)
class Net(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, num_classes):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1) 
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size1, hidden_size2) 
        self.fc3 = nn.Linear(hidden_size2, num_classes)  
        self.softmax = nn.Softmax()
        
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.fc3(out)
        out = self.softmax(out)
        return out
    
net = Net(input_size, hidden_size1, hidden_size2, num_classes)

In [None]:
# Loss and Optimizer
criterion = nn.CrossEntropyLoss()  
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)

In [None]:
# Train the Model
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):  
        # Convert torch tensor to Variable
        images = images.view(-1, 28*28)
        labels = labels
        
        # Forward + Backward + Optimize
        optimizer.zero_grad()  # zero the gradient buffer
        outputs = net(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ('Epoch [%d/%d], Step [%d/%d], Loss: %.4f' 
                   %(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.item()))
print('Training Completed')

In [None]:
# Test the Model
correct = 0
total = 0
label = np.array([])
wrong_prediction = np.array([])

for images, labels in test_loader:
    images = images.view(-1, 28*28)
    outputs = net(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()
    for i, j in zip(labels,predicted):
          if i != j:
            label = np.append(label, i)
            wrong_prediction = np.append(wrong_prediction, j)
        

print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))

In [None]:
#맞추지 못한 레이블 출력
print(label) #실제 레이블
print(wrong_prediction) #예측한 레이블

#개수가 맞는지 확인하기
print(len(label))
print(len(wrong_prediction))

In [None]:
#neural net 정보
net


<h1 id="After-Batch-normalization,-Dropout">After Batch normalization, Dropout<a class="anchor-link" href="#After-Batch-normalization,-Dropout">¶</a></h1>


In [None]:
# Neural Network Model (3 hidden layer)
class Net(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, num_classes):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1) 
        self.batch1 = nn.BatchNorm1d(hidden_size1) #피쳐개수를 인풋으로 받음
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.batch2 = nn.BatchNorm1d(hidden_size2)
        self.fc3 = nn.Linear(hidden_size2, num_classes)  
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax()
        self.dropout = nn.Dropout()
        
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.batch1(out)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.batch2(out)
        out = self.relu(out)
        out = self.fc3(out)
        out = self.softmax(out)
        return out
    
net = Net(input_size, hidden_size1, hidden_size2, num_classes)

In [None]:
# Loss and Optimizer
criterion = nn.CrossEntropyLoss()  
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)

In [None]:
# Train the Model
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):  
        # Convert torch tensor to Variable
        images = images.view(-1, 28*28)
        labels = labels
        
        # Forward + Backward + Optimize
        optimizer.zero_grad()  # zero the gradient buffer
        outputs = net(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ('Epoch [%d/%d], Step [%d/%d], Loss: %.4f' 
                   %(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.item()))
            
print('Training Completed')

In [None]:
# Test the Model
correct = 0
total = 0
label = np.array([])
wrong_prediction = np.array([])

for images, labels in test_loader:
    images = images.view(-1, 28*28)
    outputs = net(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()
    for i, j in zip(labels,predicted):
      if i != j:
        label = np.append(label, i)
        wrong_prediction = np.append(wrong_prediction, j)
        

print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))

In [None]:
#맞추지 못한 레이블 출력
print(label) #실제 레이블
print(wrong_prediction) #예측한 레이블

#개수가 맞는지 확인하기
print(len(label))
print(len(wrong_prediction))

In [None]:
#neural net 정보
net

In [None]:
# dropout과 batch normalization 후에 Loss가 증가했고, test set의 accuracy가 감소하였지만 큰 폭이 아니었다.
# 과대적합을 방지하고 학습속도 향상, 가중치 초기값의 영향을 줄이기위해 dropout과 batch normalization을 하면 좋을 것 같다.