In [34]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import torchvision.transforms as transforms
import torchvision.datasets as vdatasets
import torchvision.utils as vutils
from tensorboardX import SummaryWriter
torch.manual_seed(1)

<torch._C.Generator at 0x7ff1481d3cb0>

In [2]:
USE_CUDA = torch.cuda.is_available() # gpu 사용

### MNIST 데이터 

In [3]:
train_dataset = vdatasets.MNIST(root='../data/',
                               train=True, 
                               transform=transforms.ToTensor(),
                               download=True)


train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=64, 
                                           shuffle=True,
                                           num_workers=2)

test_dataset = vdatasets.MNIST(root='../data/',
                               train=False, 
                               transform=transforms.ToTensor(),
                               download=True)


test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                           batch_size=64, 
                                           shuffle=True,
                                           num_workers=2)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Processing...
Done!


### Model 

In [15]:
class NN(nn.Module):
    def __init__(self,input_size,hidden_size,output_size,num_layers=1):
        super(NN,self).__init__()
        self.layers = nn.Sequential()
        
        # input layer
        self.layers.add_module('input_layer',nn.Linear(input_size,hidden_size))
        self.layers.add_module('activation',nn.ReLU())
        
        # hidden layers
        for l in range(num_layers):
            self.layers.add_module('hidden_layer_'+str(l+1),nn.Linear(hidden_size,hidden_size))
            self.layers.add_module('activation_'+str(l+1),nn.ReLU())
            
        # output layer
        self.layers.add_module('output_layer',nn.Linear(hidden_size,output_size))
        
    def forward(self,inputs):
        return self.layers(inputs)

In [16]:
def evaluation(data_loader,model):
    model.eval()
    loss_function = nn.CrossEntropyLoss(size_average=False)
    num_equal=0
    losses=0
    for i, (inputs, targets) in enumerate(data_loader):
        inputs, targets = Variable(inputs).view(-1,784), Variable(targets)
        outputs = model(inputs)
        losses+=loss_function(outputs,targets).data[0]
        outputs = outputs.max(1)[1] # argmax
        num_equal += torch.eq(outputs,targets).sum().data[0]
    return num_equal/len(data_loader.dataset), losses/len(data_loader.dataset)

In [35]:
EPOCH=5
LR=0.01
HIDDEN_SIZE = 1024
BATCH_SIZE=64
LAMBDA = 0.1
NUM_LAYERS = 2

model = NN(784,HIDDEN_SIZE,10,NUM_LAYERS)
if USE_CUDA:
    model = model.cuda()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=LR)#, weight_decay=LAMBDA) # L2 norm

In [36]:
model

NN(
  (layers): Sequential(
    (input_layer): Linear(in_features=784, out_features=1024)
    (activation): ReLU()
    (hidden_layer_1): Linear(in_features=1024, out_features=1024)
    (activation_1): ReLU()
    (hidden_layer_2): Linear(in_features=1024, out_features=1024)
    (activation_2): ReLU()
    (output_layer): Linear(in_features=1024, out_features=10)
  )
)

In [37]:
%%time

model.train()
for epoch in range(EPOCH):
    losses=[]
    for i, (inputs, targets) in enumerate(train_loader):
        inputs, targets = Variable(inputs).view(-1,784), Variable(targets)
        if USE_CUDA:
            inputs = inputs.cuda()
            targets = targets.cuda()
        model.zero_grad()
        outputs = model(inputs)
        loss = loss_function(outputs, targets)
        loss.backward()
        optimizer.step()

        losses.append(loss.data[0])
        if i % 100 == 0:
            print("[%d/%d] [%03d/%d] mean_loss : %.3f" % (epoch,EPOCH,i,len(train_loader),np.mean(losses)))
            losses=[]
            
# evaluation
train_accuracy, train_loss = evaluation(train_loader,model)
test_accuracy, test_loss =evaluation(test_loader,model)

print("\n\ntrain accuracy : ",train_accuracy)
print("test accuracy : ",test_accuracy)

[0/5] [000/938] mean_loss : 2.304
[0/5] [100/938] mean_loss : 2.296
[0/5] [200/938] mean_loss : 2.279
[0/5] [300/938] mean_loss : 2.258
[0/5] [400/938] mean_loss : 2.226
[0/5] [500/938] mean_loss : 2.168
[0/5] [600/938] mean_loss : 2.060
[0/5] [700/938] mean_loss : 1.857
[0/5] [800/938] mean_loss : 1.568
[0/5] [900/938] mean_loss : 1.243
[1/5] [000/938] mean_loss : 0.853
[1/5] [100/938] mean_loss : 0.922
[1/5] [200/938] mean_loss : 0.791
[1/5] [300/938] mean_loss : 0.681
[1/5] [400/938] mean_loss : 0.613
[1/5] [500/938] mean_loss : 0.558
[1/5] [600/938] mean_loss : 0.518
[1/5] [700/938] mean_loss : 0.473
[1/5] [800/938] mean_loss : 0.477
[1/5] [900/938] mean_loss : 0.448
[2/5] [000/938] mean_loss : 0.272
[2/5] [100/938] mean_loss : 0.423
[2/5] [200/938] mean_loss : 0.410
[2/5] [300/938] mean_loss : 0.392
[2/5] [400/938] mean_loss : 0.387
[2/5] [500/938] mean_loss : 0.359
[2/5] [600/938] mean_loss : 0.358
[2/5] [700/938] mean_loss : 0.368
[2/5] [800/938] mean_loss : 0.389
[2/5] [900/938

In [30]:
!tensorboard --logdir runs

TensorBoard 0.4.0rc3 at http://dsksd-tf:6006 (Press CTRL+C to quit)
^C


### weight decay 

$$H(p,q)=-\sum_xp(x)logq(x)$$

In [45]:
import torch
from torch.autograd import Variable
import torch.nn.functional as F

z = torch.Tensor([2.0,1.0,0.1])

def softmax(z):
    exps = torch.exp(z)
    return exps / torch.sum(exps)

res = F.softmax(Variable(z),0) 
print(res)                  #print(softmax(z))
print(torch.sum(res)) #print(torch.sum(softmax(z)))

Variable containing:
 0.6590
 0.2424
 0.0986
[torch.FloatTensor of size 3]

Variable containing:
 1.0000
[torch.FloatTensor of size 1]



In [30]:
import torch
from torch.autograd import Variable
import torch.nn.functional as F

q = torch.Tensor([0.7,0.2,0.1])
p = torch.Tensor([1.0,0.,0.])

def cross_entropy(p,q):
    return -torch.sum(p*torch.log(q))

cross_entropy(p,q)

0.3566749691963196