In [3]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import torchvision.transforms as transforms
import torchvision.datasets as vision_datasets

torch.manual_seed(1)
import matplotlib.pyplot as plt
%matplotlib inline

### Load MNIST dataset

In [12]:
train_dataset = vision_datasets.MNIST(root='../data/',
                               train=True, 
                               transform=transforms.ToTensor(),
                               download=True)


train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=64, 
                                           shuffle=True,
                                           num_workers=2)

### 모델링

In [8]:
ACTIVATION_FUNCTION = F.relu # F.sigmoid F.tanh ...

In [21]:
class NN(nn.Module):
    def __init__(self,input_size,hidden_size,output_size):
        super(NN,self).__init__()
        
        self.l1 = nn.Linear(input_size,hidden_size)
        self.l2 = nn.Linear(hidden_size,hidden_size)
        self.l3 = nn.Linear(hidden_size,output_size)
        
    def forward(self,inputs):
        inputs = ACTIVATION_FUNCTION(self.l1(inputs))
        inputs = ACTIVATION_FUNCTION(self.l2(inputs))
        return self.l3(inputs)

In [28]:
STEP=3
LR=0.01
BATCH_SIZE=64

model = NN(784,1024,10)
loss_function = nn.CrossEntropyLoss() # 소프트맥스 함수 포함하고 있음
optimizer = optim.SGD(model.parameters(), lr=LR)

$$loss(x, class) = -log(exp(x[class]) / (\sum_j exp(x[j])))
 |                     = -x[class] + log(\sum_j exp(x[j]))$$

### 트레이닝 

In [29]:
for step in range(STEP):
    losses=[]
    for i, (inputs, targets) in enumerate(train_loader):
        inputs, targets = Variable(inputs).view(-1,784), Variable(targets)
        model.zero_grad()
        outputs = model(inputs)
        loss = loss_function(outputs, targets)
        loss.backward()
        optimizer.step()

        losses.append(loss.data[0])
        if i % 100 == 0:
            print("[%d/%d] [%03d/%d] mean_loss : %.3f" % (step,STEP,i,len(train_dataset)//BATCH_SIZE,np.mean(losses)))
            losses=[]

[0/3] [000/937] mean_loss : 2.303
[0/3] [100/937] mean_loss : 2.268
[0/3] [200/937] mean_loss : 2.180
[0/3] [300/937] mean_loss : 2.029
[0/3] [400/937] mean_loss : 1.785
[0/3] [500/937] mean_loss : 1.431
[0/3] [600/937] mean_loss : 1.110
[0/3] [700/937] mean_loss : 0.888
[0/3] [800/937] mean_loss : 0.757
[0/3] [900/937] mean_loss : 0.666
[1/3] [000/937] mean_loss : 0.504
[1/3] [100/937] mean_loss : 0.590
[1/3] [200/937] mean_loss : 0.523
[1/3] [300/937] mean_loss : 0.495
[1/3] [400/937] mean_loss : 0.476
[1/3] [500/937] mean_loss : 0.464
[1/3] [600/937] mean_loss : 0.445
[1/3] [700/937] mean_loss : 0.417
[1/3] [800/937] mean_loss : 0.396
[1/3] [900/937] mean_loss : 0.390
[2/3] [000/937] mean_loss : 0.495
[2/3] [100/937] mean_loss : 0.392
[2/3] [200/937] mean_loss : 0.383
[2/3] [300/937] mean_loss : 0.358
[2/3] [400/937] mean_loss : 0.372
[2/3] [500/937] mean_loss : 0.351
[2/3] [600/937] mean_loss : 0.346
[2/3] [700/937] mean_loss : 0.338
[2/3] [800/937] mean_loss : 0.341
[2/3] [900/937

트레이닝 방법론 미리 소개.. 