In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import torchvision.transforms as transforms
import torchvision.datasets as vdatasets
import torchvision.utils as vutils
from tensorboardX import SummaryWriter
torch.manual_seed(1)

<torch._C.Generator at 0x7fcfecbc1730>

In [2]:
USE_CUDA = torch.cuda.is_available() # gpu 사용

### MNIST 데이터 

In [3]:
train_dataset = vdatasets.MNIST(root='../data/',
                               train=True, 
                               transform=transforms.ToTensor(),
                               download=True)


train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=64, 
                                           shuffle=True,
                                           num_workers=2)

test_dataset = vdatasets.MNIST(root='../data/',
                               train=False, 
                               transform=transforms.ToTensor(),
                               download=True)


test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                           batch_size=64, 
                                           shuffle=True,
                                           num_workers=2)

### Model 

In [8]:
class NN(nn.Module):
    def __init__(self,input_size,hidden_size,output_size,num_layers=1):
        super(NN,self).__init__()
        self.layers = nn.Sequential()
        
        # input layer
        self.layers.add_module('input_layer',nn.Linear(input_size,hidden_size))
        self.layers.add_module('activation',nn.ReLU())
        
        # hidden layers
        for l in range(num_layers):
            self.layers.add_module('hidden_layer_'+str(l+1),nn.Linear(hidden_size,hidden_size))
            self.layers.add_module('activation_'+str(l+1),nn.ReLU())
            
        # output layer
        self.layers.add_module('output_layer',nn.Linear(hidden_size,output_size))
        
    def forward(self,inputs):
        return self.layers(inputs)

In [9]:
def evaluation(data_loader,model):
    model.eval()
    loss_function = nn.CrossEntropyLoss(size_average=False)
    num_equal=0
    losses=0
    for i, (inputs, targets) in enumerate(data_loader):
        inputs, targets = Variable(inputs).view(-1,784), Variable(targets)
        outputs = model(inputs)
        losses+=loss_function(outputs,targets).data[0]
        outputs = outputs.max(1)[1] # argmax
        num_equal += torch.eq(outputs,targets).sum().data[0]
    return num_equal/len(data_loader.dataset), losses/len(data_loader.dataset)

In [45]:
EPOCH=30
LR=0.01
HIDDEN_SIZE = 256
BATCH_SIZE=64
LAMBDA = 0.0001
NUM_LAYERS = 4

model = NN(784,HIDDEN_SIZE,10,NUM_LAYERS)
if USE_CUDA:
    model = model.cuda()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=LR, weight_decay=LAMBDA) # L2 norm

In [46]:
model

NN(
  (layers): Sequential(
    (input_layer): Linear(in_features=784, out_features=256)
    (activation): ReLU()
    (hidden_layer_1): Linear(in_features=256, out_features=256)
    (activation_1): ReLU()
    (hidden_layer_2): Linear(in_features=256, out_features=256)
    (activation_2): ReLU()
    (hidden_layer_3): Linear(in_features=256, out_features=256)
    (activation_3): ReLU()
    (hidden_layer_4): Linear(in_features=256, out_features=256)
    (activation_4): ReLU()
    (output_layer): Linear(in_features=256, out_features=10)
  )
)

In [47]:
%%time

writer = SummaryWriter(comment='-weight-decay-baseline')

model.train()
for epoch in range(EPOCH):
    losses=[]
    for i, (inputs, targets) in enumerate(train_loader):
        inputs, targets = Variable(inputs).view(-1,784), Variable(targets)
        if USE_CUDA:
            inputs = inputs.cuda()
            targets = targets.cuda()
        model.zero_grad()
        outputs = model(inputs)
        loss = loss_function(outputs, targets)
        loss.backward()
        optimizer.step()

        losses.append(loss.data[0])
        if i % 500 == 0:
            print("[%d/%d] [%03d/%d] mean_loss : %.3f" % (epoch,EPOCH,i,len(train_loader),np.mean(losses)))
            losses=[]

# add graph
writer.add_graph(model,outputs)

# evaluation
train_accuracy, train_loss = evaluation(train_loader,model)
test_accuracy, test_loss =evaluation(test_loader,model)

print("\n\ntrain accuracy : ",train_accuracy)
print("test accuracy : ",test_accuracy)

[0/30] [000/938] mean_loss : 2.302
[0/30] [500/938] mean_loss : 2.301
[1/30] [000/938] mean_loss : 2.296
[1/30] [500/938] mean_loss : 2.298
[2/30] [000/938] mean_loss : 2.290
[2/30] [500/938] mean_loss : 2.288
[3/30] [000/938] mean_loss : 2.239
[3/30] [500/938] mean_loss : 2.144
[4/30] [000/938] mean_loss : 1.169
[4/30] [500/938] mean_loss : 0.910
[5/30] [000/938] mean_loss : 0.422
[5/30] [500/938] mean_loss : 0.547
[6/30] [000/938] mean_loss : 0.340
[6/30] [500/938] mean_loss : 0.381
[7/30] [000/938] mean_loss : 0.400
[7/30] [500/938] mean_loss : 0.304
[8/30] [000/938] mean_loss : 0.174
[8/30] [500/938] mean_loss : 0.241
[9/30] [000/938] mean_loss : 0.202
[9/30] [500/938] mean_loss : 0.200
[10/30] [000/938] mean_loss : 0.114
[10/30] [500/938] mean_loss : 0.168
[11/30] [000/938] mean_loss : 0.309
[11/30] [500/938] mean_loss : 0.151
[12/30] [000/938] mean_loss : 0.150
[12/30] [500/938] mean_loss : 0.133
[13/30] [000/938] mean_loss : 0.027
[13/30] [500/938] mean_loss : 0.115
[14/30] [000

In [30]:
!tensorboard --logdir runs

TensorBoard 0.4.0rc3 at http://dsksd-tf:6006 (Press CTRL+C to quit)
^C
