In [2]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import torchvision.transforms as transforms
import torchvision.datasets as vdatasets

torch.manual_seed(1)
import matplotlib.pyplot as plt
%matplotlib inline

### Softmax 

$$softmax(x) = \frac{exp(x_i)}{\sum_j exp(x_j)}$$

In [6]:
z = torch.Tensor([2.0,1.0,0.1])

def softmax(z):
    exps = torch.exp(z)
    return exps / torch.sum(exps)

print(softmax(z))
print(torch.sum(softmax(z)))
print("")

res = F.softmax(Variable(z),0) 
print(res)                  
print(torch.sum(res)) 


 0.6590
 0.2424
 0.0986
[torch.FloatTensor of size 3]

0.9999999552965164

Variable containing:
 0.6590
 0.2424
 0.0986
[torch.FloatTensor of size 3]

Variable containing:
 1.0000
[torch.FloatTensor of size 1]



### Cross entropy 

$$H(p,q)=-\sum_xp(x)logq(x)$$

In [33]:
predict_1 = torch.Tensor([0.7,0.2,0.1])
predict_2 = torch.Tensor([0.01,0.9,0.99])

target = torch.Tensor([1.0,0.,0.])

def cross_entropy(q,p):
    return -torch.sum(p*torch.log(q))

print(cross_entropy(predict_1,target)) # 옳게 예측 -> low loss(cost)
print(cross_entropy(predict_2,target)) # 틀리게 예측 --> high loss(cost)

# in practical 

predict_1  = torch.Tensor([2.,1.,0.1])
predict_2 = torch.Tensor([1.,2.,0.1])
target = torch.LongTensor([0])

loss_function = nn.CrossEntropyLoss() # softmax 연산을 포함하고 있음
loss1 = loss_function(Variable(predict_1).view(1,-1),Variable(target))
loss2 = loss_function(Variable(predict_2).view(1,-1),Variable(target))

print(loss1.data[0])
print(loss2.data[0])

0.3566749691963196
4.605170249938965
0.41703000664711
1.4170299768447876


### MNIST dataset

In [2]:
train_dataset = vdatasets.MNIST(root='../data/',
                               train=True, 
                               transform=transforms.ToTensor(),
                               download=True)


train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=64, 
                                           shuffle=True,
                                           num_workers=2)

test_dataset = vdatasets.MNIST(root='../data/',
                               train=False, 
                               transform=transforms.ToTensor(),
                               download=True)


test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                           batch_size=64, 
                                           shuffle=True,
                                           num_workers=2)

### 모델링

In [3]:
ACTIVATION_FUNCTION = F.relu # F.sigmoid F.tanh ...

In [4]:
class NN(nn.Module):
    def __init__(self,input_size,hidden_size,output_size):
        super(NN,self).__init__()
        
        self.l1 = nn.Linear(input_size,hidden_size)
        self.l2 = nn.Linear(hidden_size,hidden_size)
        self.l3 = nn.Linear(hidden_size,output_size)
        
    def forward(self,inputs):
        inputs = ACTIVATION_FUNCTION(self.l1(inputs))
        inputs = ACTIVATION_FUNCTION(self.l2(inputs))
        return self.l3(inputs)

In [5]:
EPOCH=3
LR=0.01
BATCH_SIZE=64

model = NN(784,1024,10)
loss_function = nn.CrossEntropyLoss() # 소프트맥스 함수 포함하고 있음
optimizer = optim.SGD(model.parameters(), lr=LR)

$$loss(x, class) = -log(exp(x[class]) / (\sum_j exp(x[j])))
 |                     = -x[class] + log(\sum_j exp(x[j]))$$

### 트레이닝 

In [6]:
for epoch in range(EPOCH):
    losses=[]
    for i, (inputs, targets) in enumerate(train_loader):
        inputs, targets = Variable(inputs).view(-1,784), Variable(targets)
        model.zero_grad()
        outputs = model(inputs)
        loss = loss_function(outputs, targets)
        loss.backward()
        optimizer.step()

        losses.append(loss.data[0])
        if i % 100 == 0:
            print("[%d/%d] [%03d/%d] mean_loss : %.3f" % (epoch,EPOCH,i,len(train_loader),np.mean(losses)))
            losses=[]

[0/3] [000/937] mean_loss : 2.306
[0/3] [100/937] mean_loss : 2.267
[0/3] [200/937] mean_loss : 2.183
[0/3] [300/937] mean_loss : 2.042
[0/3] [400/937] mean_loss : 1.800
[0/3] [500/937] mean_loss : 1.464
[0/3] [600/937] mean_loss : 1.156
[0/3] [700/937] mean_loss : 0.921
[0/3] [800/937] mean_loss : 0.772
[0/3] [900/937] mean_loss : 0.678
[1/3] [000/937] mean_loss : 0.582
[1/3] [100/937] mean_loss : 0.573
[1/3] [200/937] mean_loss : 0.546
[1/3] [300/937] mean_loss : 0.505
[1/3] [400/937] mean_loss : 0.483
[1/3] [500/937] mean_loss : 0.468
[1/3] [600/937] mean_loss : 0.459
[1/3] [700/937] mean_loss : 0.442
[1/3] [800/937] mean_loss : 0.408
[1/3] [900/937] mean_loss : 0.386
[2/3] [000/937] mean_loss : 0.458
[2/3] [100/937] mean_loss : 0.400
[2/3] [200/937] mean_loss : 0.370
[2/3] [300/937] mean_loss : 0.361
[2/3] [400/937] mean_loss : 0.341
[2/3] [500/937] mean_loss : 0.367
[2/3] [600/937] mean_loss : 0.360
[2/3] [700/937] mean_loss : 0.341
[2/3] [800/937] mean_loss : 0.342
[2/3] [900/937

### 테스트 

In [21]:
num_equal=0
for i, (inputs, targets) in enumerate(test_loader):
    inputs, targets = Variable(inputs).view(-1,784), Variable(targets)
    outputs = model(inputs)
    
    outputs = outputs.max(1)[1] # argmax
    num_equal += torch.eq(outputs,targets).sum().data[0]
    
    
print("Accuracy : " ,num_equal/len(test_dataset))

Accuracy :  0.9081


### TODO 

* 네트워크 구조 바꿔보기(# of layers, # of hidden nodes, activation function)