In [1]:
import torch
import torch.nn as nn
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# MNIST 데이터셋 
train_data = datasets.MNIST(
    root="../data",
    train=True,
    download=True,
    transform=transforms.ToTensor(),
)

test_data = datasets.MNIST(
    root="../data",
    train=False,
    download=True,
    transform=transforms.ToTensor(),
)

# Data loader
train_loader = DataLoader(train_data, batch_size=128, shuffle=True)
test_loader = DataLoader(test_data, batch_size=128, shuffle=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data\MNIST\raw\train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ../data\MNIST\raw\train-images-idx3-ubyte.gz to ../data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data\MNIST\raw\train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ../data\MNIST\raw\train-labels-idx1-ubyte.gz to ../data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data\MNIST\raw\t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ../data\MNIST\raw\t10k-images-idx3-ubyte.gz to ../data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data\MNIST\raw\t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ../data\MNIST\raw\t10k-labels-idx1-ubyte.gz to ../data\MNIST\raw



  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [2]:
# Model
class Model(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(Model, self).__init__()
        self.mlp1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()
        self.mlp2 = nn.Linear(hidden_size, num_classes)  
        
    def forward(self, x):
        out = self.mlp1(x)
        out = self.relu(out)
        out = self.mlp2(out)
        
        return out

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Model(input_size=28*28*1, hidden_size=100, num_classes=10).to(device)

In [5]:
CELoss = nn.CrossEntropyLoss()
adam_optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [6]:
# 뉴럴 네트워크 모델 학습
total_epochs = 3
print('number of iteration:', len(train_loader))

number of iteration: 469


In [7]:
# epoch : 모든 데이터를 한 번 학습하는 단위
for epoch in range(total_epochs):
    # iteration : 한 'mini-batch' 단위의 데이터를 학습하는 단위
    for i, (images, labels) in enumerate(train_loader):  
        # images : [mini-batch, 1, 28, 28]
        # labels : [mini-batch]
        images = images.reshape(-1, 28*28).to(device) 
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        ce_loss = CELoss(outputs, labels)
        
        # Backward and optimize
        adam_optimizer.zero_grad() # 다양한 optimization 기법 적용 가능
        ce_loss.backward() # Back propagation
        adam_optimizer.step() # optimizer 작동

    print ('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, total_epochs, ce_loss.item()))

Epoch [1/3], Loss: 0.2355
Epoch [2/3], Loss: 0.1047
Epoch [3/3], Loss: 0.1042


In [8]:
# 학습이 끝난 후 모델 성능 테스트
# test에서는 back propagation 작업을 하지 않으므로 gradient를 계산하지 않도록 함 - 메모리의 효율성을 위해
with torch.no_grad(): # gradient 계산하지 않도록 하는 코드
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))

Accuracy of the network on the 10000 test images: 95.87 %


In [9]:
# 학습한 모델을 model.ckpt라는 이름으로 저장
torch.save(model.state_dict(), 'model_basic.ckpt')

In [10]:
# 최적화 함수

In [11]:
# Stochastic Gradient Descent
sgd_optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

# Stochastic Gradient Descent with momentum
sgd_with_momentum_optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

# Adagrad
Adagrad_optimizer = torch.optim.Adagrad(model.parameters(), lr=0.01)

# RMSprop
RMSprop_optimizer = torch.optim.RMSprop(model.parameters(), lr=0.01)

# Adam
adam_optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [13]:
# Dropout

In [None]:
# 뉴럴 네트워크 모델 학습
total_epochs = 3
for epoch in range(len(train_loader)):
    for i, (images, labels) in enumerate(train_loader):  
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        ce_loss = CELoss(outputs, labels)
        
        # Backward and optimize
        sgd_optimizer.zero_grad()
        ce_loss.backward() # Back propagation
        sgd_optimizer.step() # optimizer 작동
            
    print ('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, total_epochs, ce_loss.item()))

In [None]:
# Dropout 추가
class Model(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(Model, self).__init__()
        self.mlp1 = nn.Linear(input_size, hidden_size)
        self.dropout = nn.Dropout(p=0.5) # p : probability of an element to be zeroed. Default: 0.5
        self.relu = nn.ReLU()
        self.mlp2 = nn.Linear(hidden_size, num_classes)  
        
    def forward(self, x):
        out = self.mlp1(x)
        out = self.dropout(out) # dropout 추가!
        out = self.relu(out)
        out = self.mlp2(out)
        return out

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Model(input_size=28*28*1, hidden_size=100, num_classes=10).to(device)

In [None]:
# Batch Normalization

In [None]:
# Batch Normalization 추가
class Model(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(Model, self).__init__()
        self.mlp1 = nn.Linear(input_size, hidden_size)
        self.bn = nn.BatchNorm1d(hidden_size) # batch normalization 1d
        self.relu = nn.ReLU()
        self.mlp2 = nn.Linear(hidden_size, num_classes)  
        
    def forward(self, x):
        out = self.mlp1(x)
        out = self.dropout(out) # batch normalization 추가!
        out = self.relu(out)
        out = self.mlp2(out)
        return out

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Model(input_size=28*28*1, hidden_size=100, num_classes=10).to(device)