In [2]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import torchvision.transforms as transforms
import torchvision.datasets as vdatasets
import torchvision.utils as vutils
from tensorboardX import SummaryWriter
torch.manual_seed(1)

<torch._C.Generator at 0x7f8a20888750>

In [3]:
USE_CUDA = torch.cuda.is_available() # gpu 사용

### MNIST 데이터 

In [10]:
train_dataset = vdatasets.MNIST(root='../data/',
                               train=True, 
                               transform=transforms.ToTensor(),
                               download=True)


train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=64, 
                                           shuffle=True,
                                           num_workers=2)

test_dataset = vdatasets.MNIST(root='../data/',
                               train=False, 
                               transform=transforms.ToTensor(),
                               download=True)


test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                           batch_size=64, 
                                           shuffle=True,
                                           num_workers=2)

### Baseline 

In [None]:
class NN(nn.Module):
    def __init__(self,input_size,hidden_size,output_size):
        super(NN,self).__init__()
        
    def forward(self,inputs):
        pass

In [48]:
# 실험을 위해 좀 더 딥한 모델 정의

class NN(nn.Module):
    def __init__(self,input_size,hidden_size,output_size):
        super(NN,self).__init__()
        
        self.l1 = nn.Linear(input_size,hidden_size)
        self.l2 = nn.Linear(hidden_size,hidden_size)
        self.l3 = nn.Linear(hidden_size,hidden_size//2)
        self.l4 = nn.Linear(hidden_size//2,hidden_size//4)
        self.l5 = nn.Linear(hidden_size//4,output_size)
        
    def forward(self,inputs):
        inputs = F.relu(self.l1(inputs))
        inputs = F.relu(self.l2(inputs))
        inputs = F.relu(self.l3(inputs))
        inputs = F.relu(self.l4(inputs))
        return self.l5(inputs)

In [49]:
def evaluation(data_loader,model):
    model.eval()
    loss_function = nn.CrossEntropyLoss(size_average=False)
    num_equal=0
    losses=0
    for i, (inputs, targets) in enumerate(data_loader):
        inputs, targets = Variable(inputs).view(-1,784), Variable(targets)
        outputs = model(inputs)
        losses+=loss_function(outputs,targets).data[0]
        outputs = outputs.max(1)[1] # argmax
        num_equal += torch.eq(outputs,targets).sum().data[0]
    return num_equal/len(data_loader.dataset), losses/len(data_loader.dataset)

In [52]:
EPOCH=3
LR=0.01
BATCH_SIZE=64
LAMBDA = 0.1

model = NN(784,512,10)
if USE_CUDA:
    model = model.cuda()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=LR, weight_decay=LAMBDA) # L2 norm

In [53]:
model

NN(
  (l1): Linear(in_features=784, out_features=512)
  (l2): Linear(in_features=512, out_features=512)
  (l3): Linear(in_features=512, out_features=256)
  (l4): Linear(in_features=256, out_features=128)
  (l5): Linear(in_features=128, out_features=10)
)

In [56]:
l1_norm = 0

In [67]:
torch.pow(param.data,2).sum()

0.2642144385026768

In [68]:
torch.norm(param.data)

0.5140179375994147

In [55]:
%%time

model.train()
for epoch in range(EPOCH):
    losses=[]
    for i, (inputs, targets) in enumerate(train_loader):
        inputs, targets = Variable(inputs).view(-1,784), Variable(targets)
        if USE_CUDA:
            inputs = inputs.cuda()
            targets = targets.cuda()
        model.zero_grad()
        outputs = model(inputs)
        loss = loss_function(outputs, targets)
        loss.backward()
        optimizer.step()

        losses.append(loss.data[0])
        if i % 100 == 0:
            print("[%d/%d] [%03d/%d] mean_loss : %.3f" % (epoch,EPOCH,i,len(train_loader),np.mean(losses)))
            losses=[]
            
# evaluation
train_accuracy, train_loss = evaluation(train_loader,model)
test_accuracy, test_loss =evaluation(test_loader,model)

print("\n\ntrain accuracy : ",train_accuracy)
print("test accuracy : ",test_accuracy)

[0/3] [000/938] mean_loss : 0.678
[0/3] [100/938] mean_loss : 0.563
[0/3] [200/938] mean_loss : 0.560
[0/3] [300/938] mean_loss : 0.535
[0/3] [400/938] mean_loss : 0.535
[0/3] [500/938] mean_loss : 0.497
[0/3] [600/938] mean_loss : 0.487
[0/3] [700/938] mean_loss : 0.493
[0/3] [800/938] mean_loss : 0.463
[0/3] [900/938] mean_loss : 0.448
[1/3] [000/938] mean_loss : 0.340
[1/3] [100/938] mean_loss : 0.441
[1/3] [200/938] mean_loss : 0.450
[1/3] [300/938] mean_loss : 0.393
[1/3] [400/938] mean_loss : 0.388
[1/3] [500/938] mean_loss : 0.381
[1/3] [600/938] mean_loss : 0.377
[1/3] [700/938] mean_loss : 0.390
[1/3] [800/938] mean_loss : 0.359
[1/3] [900/938] mean_loss : 0.346
[2/3] [000/938] mean_loss : 0.297
[2/3] [100/938] mean_loss : 0.363
[2/3] [200/938] mean_loss : 0.335
[2/3] [300/938] mean_loss : 0.326
[2/3] [400/938] mean_loss : 0.337
[2/3] [500/938] mean_loss : 0.302
[2/3] [600/938] mean_loss : 0.312
[2/3] [700/938] mean_loss : 0.303
[2/3] [800/938] mean_loss : 0.276
[2/3] [900/938

In [30]:
!tensorboard --logdir runs

TensorBoard 0.4.0rc3 at http://dsksd-tf:6006 (Press CTRL+C to quit)
^C


### weight decay 