In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import torchvision.transforms as transforms
import torchvision.datasets as vdatasets
import torchvision.utils as vutils
from tensorboardX import SummaryWriter
torch.manual_seed(1)

<torch._C.Generator at 0x7ff6a8071730>

In [2]:
USE_CUDA = torch.cuda.is_available() # gpu 사용

### MNIST 데이터

In [5]:
BATCH_SIZE = 64

In [6]:
train_dataset = vdatasets.MNIST(root='../../data/MNIST/',
                               train=True, 
                               transform=transforms.ToTensor(),
                               download=True)


train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=BATCH_SIZE, 
                                           shuffle=True,
                                           num_workers=2)

test_dataset = vdatasets.MNIST(root='../../data/MNIST/',
                               train=False, 
                               transform=transforms.ToTensor(),
                               download=True)


test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                           batch_size=BATCH_SIZE, 
                                           shuffle=True,
                                           num_workers=2)

### Model 

In [26]:
class NN(nn.Module):
    def __init__(self,hidden_size,dropout_p=0.5):
        super(NN,self).__init__()
        self.l1 = nn.Linear(784,hidden_size)
        self.l2 = nn.Linear(hidden_size,hidden_size)
        self.l3 = nn.Linear(hidden_size,10)
        
        self.dropout = nn.Dropout(dropout_p)
        
    def forward(self,inputs):
        outputs = F.relu(self.l1(inputs))
        outputs = self.dropout(outputs)
        outputs = F.relu(self.l2(outputs))
        outputs = self.dropout(outputs)
        return self.l3(outputs)
    
model = NN(512)
model.train()
# 트레이닝 Process

model.eval()
# 테스트(실제 모델 러닝) Process

NN(
  (l1): Linear(in_features=784, out_features=1024)
  (l2): Linear(in_features=1024, out_features=1024)
  (l3): Linear(in_features=1024, out_features=10)
  (dropout): Dropout(p=0.5)
)

In [42]:
def evaluation(data_loader,model):
    model.eval() # for dropout at test time!
    loss_function = nn.CrossEntropyLoss(size_average=False)
    num_equal=0
    losses=0
    for i, (inputs, targets) in enumerate(data_loader):
        inputs, targets = Variable(inputs).view(-1,784), Variable(targets)
        outputs = model(inputs)
        losses+=loss_function(outputs,targets).data[0]
        outputs = outputs.max(1)[1] # argmax
        num_equal += torch.eq(outputs,targets).sum().data[0]
    return num_equal/len(data_loader.dataset), losses/len(data_loader.dataset)

In [72]:
EPOCH=15
LR=0.1
HIDDEN_SIZE = 256
BATCH_SIZE=64
NUM_LAYERS = 1
DROPOUT=0.5

model = NN(HIDDEN_SIZE,DROPOUT)
if USE_CUDA:
    model = model.cuda()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=LR)

In [73]:
model

NN(
  (l1): Linear(in_features=784, out_features=256)
  (l2): Linear(in_features=256, out_features=256)
  (l3): Linear(in_features=256, out_features=10)
  (dropout): Dropout(p=0.5)
)

In [74]:
%%time
model.train()
for epoch in range(EPOCH):
    losses=[]
    for i, (inputs, targets) in enumerate(train_loader):
        inputs, targets = Variable(inputs).view(-1,784), Variable(targets)
        if USE_CUDA:
            inputs = inputs.cuda()
            targets = targets.cuda()
        model.zero_grad()
        outputs = model(inputs)
        loss = loss_function(outputs, targets)
        loss.backward()
        optimizer.step()

        losses.append(loss.data[0])
        if i % 500 == 0:
            print("[%d/%d] [%03d/%d] mean_loss : %.3f" % (epoch,EPOCH,i,len(train_loader),np.mean(losses)))
            losses=[]

# evaluation
train_accuracy, train_loss = evaluation(train_loader,model)
test_accuracy, test_loss =evaluation(test_loader,model)

print("\n\ntrain accuracy : ",train_accuracy)
print("test accuracy : ",test_accuracy)

[0/15] [000/938] mean_loss : 2.294
[0/15] [500/938] mean_loss : 0.824
[1/15] [000/938] mean_loss : 0.270
[1/15] [500/938] mean_loss : 0.278
[2/15] [000/938] mean_loss : 0.267
[2/15] [500/938] mean_loss : 0.216
[3/15] [000/938] mean_loss : 0.186
[3/15] [500/938] mean_loss : 0.173
[4/15] [000/938] mean_loss : 0.115
[4/15] [500/938] mean_loss : 0.155
[5/15] [000/938] mean_loss : 0.226
[5/15] [500/938] mean_loss : 0.141
[6/15] [000/938] mean_loss : 0.069
[6/15] [500/938] mean_loss : 0.128
[7/15] [000/938] mean_loss : 0.303
[7/15] [500/938] mean_loss : 0.121
[8/15] [000/938] mean_loss : 0.070
[8/15] [500/938] mean_loss : 0.114
[9/15] [000/938] mean_loss : 0.121
[9/15] [500/938] mean_loss : 0.105
[10/15] [000/938] mean_loss : 0.063
[10/15] [500/938] mean_loss : 0.105
[11/15] [000/938] mean_loss : 0.133
[11/15] [500/938] mean_loss : 0.097
[12/15] [000/938] mean_loss : 0.073
[12/15] [500/938] mean_loss : 0.093
[13/15] [000/938] mean_loss : 0.055
[13/15] [500/938] mean_loss : 0.086
[14/15] [000

### TODO 

* Dropout을 사용했을 때와 안했을 때의 차이 비교해보기 (train/test error tradeoff)