In [1]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
%matplotlib inline
%config lilineBackend.figure_format='retina'

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [3]:
mnist_train = datasets.MNIST(root='./data/', train=True, transform=transforms.ToTensor(), download=True)
mnist_test = datasets.MNIST(root='./data/', train=False, transform=transforms.ToTensor(), download=True)

In [4]:
print(mnist_train)
print(f'{"-"*10}')
print(mnist_test)

Dataset MNIST
    Number of datapoints: 60000
    Root location: ./data/
    Split: Train
    StandardTransform
Transform: ToTensor()
----------
Dataset MNIST
    Number of datapoints: 10000
    Root location: ./data/
    Split: Test
    StandardTransform
Transform: ToTensor()


In [5]:
BATCH_SIZE = 256
train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=BATCH_SIZE, shuffle=True, num_workers=1, drop_last=True)
test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=BATCH_SIZE, shuffle=True, num_workers=1, drop_last=True)

# <font color='red'>DataLoader</font>
* ## dataset
    * dataset
    * map-style dataset : index가 존재, \_\_getitem\_\_ 과 \_\_len\_\_ 선언 필요
    * iterable-style dataset : \_\_iter\_\_ 선언 필요
* ## batch_size
    * int, optional, default=1
    * batch 의 크기, tensor의 형태로 데이터가 반환, tensor로 변환이 안되는 데이터는 에러
* ## shuffle
    * bool, optional, default=false
    * 데이터를 섞어 사용할지 결정, 실험 재현을 위해 torch.manual_seed 를 고정하기도 함
* ## sampler
    * sampler, optional
    * index를 컨트롤하는 방법, 사용하기 위해서는 shuffle은 False, map-style에서 컨트롤 할때 사용
        * sequetialSampler : 항상 같은 순서
        * RandomSampler : 랜덤, replacement 여부 선택 가능, 개수 선택 가능
        * SubsetRandomSampler : 랜덤 리스트
        * WeightRandomSampler : 가중치에 따른 확률
        * BatchSampler : batch 단위로 sampling 가능
        * DistributedSampler : 분산처리
* ## batch_sampler
    * sampler와 거의 동일
* ## num_workers
    * int, optional, default=0
    * 데이터 로딩에 사용되는 subprocess 수, default의 경우 main process로 데이터를 불러옴, 병목이 생길 수 있음
* ## callate_fn
    * callable, optional
    * map-style 데이터셋에서 sample list를 batch 단위로 바꾸기 위해 사용
* ## pin_memory
    * bool, optional
    * True 선언시, 데이터로더는 tensor 를 cuda 고정 메모리에 올림
* ## drop_last
    * bool, optional
    * 마지막 남은 batch 를 drop
    * batch 사이즈가 다를 경우 용이
* ## time_out
    * numeric, optional, default=0
    * 양수로 주어지는 경우, 데이터로더가 데이터를 불러오는 제한시간
* ## worker_init_fn
    * callable, optioinal, default='None'
    * 어떤 worker를 불러올지 리스트로 전달

In [29]:
class MNIST_MLP(nn.Module):
    def __init__(self, name='mlp', xdim=28*28, hdim=256, ydim=10):
        super(MNIST_MLP, self).__init__() # 부모 클래스의 속성을 가져오기 위해 명시
        self.name = name
        self.xdim = xdim
        self.hdim = hdim
        self.ydim = ydim
        
        self.lin_1 = nn.Linear(self.xdim, self.hdim)
        self.lin_2 = nn.Linear(self.hdim, self.ydim) 
#         self.lin_2 = nn.Linear(self.hdim, 100)
#         self.lin_3 = nn.Linear(100, self.ydim)
#         self.layers = [self.lin_1, self.lin_2, self.lin_3]
        self.layers = [self.lin_1, self.lin_2]

        self.net = nn.Sequential()
        
        for l_idx, layer in enumerate(self.layers):
            layer_name = f'{type(layer).__name__.lower()}_{l_idx}'
            self.net.add_module(layer_name, layer)
            
        self. init_param() # 파라미터 initialize
        
    def init_param(self):
        nn.init.kaiming_normal_(self.lin_1.weight)
        nn.init.zeros_(self.lin_1.bias)
        nn.init.kaiming_normal_(self.lin_2.weight)
        nn.init.zeros_(self.lin_2.bias)
#         nn.init.kaiming_normal_(self.lin_3.weight)
#         nn.init.zeros_(self.lin_3.bias)
    
    def forward(self, x):
        return self.net(x)

In [40]:
M = MNIST_MLP(name='mlp', xdim=28*28, hdim=256, ydim=10).to(device)
loss = nn.CrossEntropyLoss()
optm = optim.Adam(M.parameters(), lr=1e-3)

In [41]:
def func_eval(model, data_iter, device):
    with torch.no_grad():
        model.eval()
        n_total, n_correct = 0, 0
        for batch_in, batch_out in data_iter:
            y_trgt = batch_out.to(device)
            model_pred = model(batch_in.view(-1, 28*28).to(device))
            _, y_pred = torch.max(model_pred.data, 1)
            n_correct += (y_pred==y_trgt).sum().item()
            n_total += batch_in.size(0)
        val_accr = (n_correct/n_total)
        model.train()
    return val_accr

In [42]:
M.init_param()
train_accr = func_eval(M, train_iter, device)
test_accr = func_eval(M, test_iter, device)
print(f"train_accr : {round(train_accr, 3)}, test_accr : {round(test_accr, 3)}")

train_accr : 0.101, test_accr : 0.104


In [43]:
print('Training Start')
M.init_param()
M.train()
EPOCHS, print_every = 10, 1
for epoch in range(EPOCHS):
    loss_val_sum = 0
    for batch_in, batch_out in train_iter:
        # Forward
        y_pred = M.forward(batch_in.view(-1, 28*28).to(device))
        loss_out = loss(y_pred, batch_out.to(device))
        # Update
        optm.zero_grad()
        loss_out.backward()
        optm.step()
        loss_val_sum += loss_out
    loss_val_avg = loss_val_sum/len(train_iter)
    # Print
    if ((epoch%print_every)==0) or (epoch==(EPOCHS-1)):
        train_accr = func_eval(M, train_iter, device)
        test_accr = func_eval(M, test_iter, device)
        
        print(f"epoch : {epoch}")
        print(f"loss : {loss_val_avg}, train_accr : {round(train_accr, 3)}, test_accr : {round(test_accr, 3)}")
print(f"{'-'*30}")
print('Training End')

Training Start


  return F.mse_loss(input, target, reduction=self.reduction)


RuntimeError: The size of tensor a (10) must match the size of tensor b (256) at non-singleton dimension 1