## 1. 초기 설정

In [1]:
import torch
from torch.autograd import Variable
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import random
import torch.nn.init

torch.manual_seed(777)

training_epochs = 15
batch_size = 100
keep_prob = 0.7

## 2. training set과 Variable 만들기

In [4]:
mnist_train = dsets.MNIST(root='MNIST_data/',
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True)
mnist_test = dsets.MNIST(root='MNIST_data/',
                         train=False,
                         transform=transforms.ToTensor(),
                         download=True)
data_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True)

## 3. model 만들기

In [5]:
linear1 = torch.nn.Linear(784, 512, bias=True)
linear2 = torch.nn.Linear(512, 512, bias=True)
linear3 = torch.nn.Linear(512, 512, bias=True)
linear4 = torch.nn.Linear(512, 512, bias=True)
linear5 = torch.nn.Linear(512, 10, bias=True)
relu = torch.nn.ReLU()

# dropout메서드의 p는 layer 사이 사이의 연결을 끊어버리는 확률을 말한다.
dropout = torch.nn.Dropout(p=1-keep_prob)

torch.nn.init.xavier_uniform(linear1.weight)
torch.nn.init.xavier_uniform(linear2.weight)
torch.nn.init.xavier_uniform(linear3.weight)
torch.nn.init.xavier_uniform(linear4.weight)
torch.nn.init.xavier_uniform(linear5.weight)

# activation function 다음에 dropout을 넣어야 한다.
model = torch.nn.Sequential(linear1, relu, dropout,
                            linear2, relu, dropout,
                            linear3, relu, dropout,
                            linear4, relu, dropout,
                            linear5)

## 4. cost function과 optimizer 만들기

In [6]:
cost_func = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

## 5. model 훈련시키기

In [8]:
for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = len(mnist_train) // batch_size

    for i, (batch_xs, batch_ys) in enumerate(data_loader):
        X = Variable(batch_xs.view(-1, 28 * 28))
        Y = Variable(batch_ys) 

        optimizer.zero_grad()
        Y_hat = model(X)
        cost = cost_func(Y_hat, Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / total_batch

    print("[Epoch: {:>4}] cost = {:>.9}".format(epoch + 1, avg_cost.data[0]))

print('Learning Finished!')

[Epoch:    1] cost = 0.308780164
[Epoch:    2] cost = 0.145706803
[Epoch:    3] cost = 0.112665333
[Epoch:    4] cost = 0.0920754746
[Epoch:    5] cost = 0.0855652764
[Epoch:    6] cost = 0.0740884691
[Epoch:    7] cost = 0.0631967261
[Epoch:    8] cost = 0.0621559173
[Epoch:    9] cost = 0.0583355762
[Epoch:   10] cost = 0.0541223735
[Epoch:   11] cost = 0.0532222018
[Epoch:   12] cost = 0.0481303334
[Epoch:   13] cost = 0.0508971773
[Epoch:   14] cost = 0.0456200764
[Epoch:   15] cost = 0.0437856503
Learning Finished!


## 6. model 확인하기

In [9]:
# dropout으로 훈련을 시킨 후에 model의 모든 연결이 필요로 하다면 eval이라는 메서드를 사용해야 한다.
model.eval()

X_test = Variable(mnist_test.test_data.view(-1, 28 * 28).float())
Y_test = Variable(mnist_test.test_labels)

prediction = model(X_test)
correct_prediction = (torch.max(prediction.data, 1)[1] == Y_test.data)
accuracy = correct_prediction.float().mean()
print('Accuracy:', accuracy)

# Get one and predict
r = random.randint(0, len(mnist_test) - 1)
X_single_data = Variable(mnist_test.test_data[r:r + 1].view(-1, 28 * 28).float())
Y_single_data = Variable(mnist_test.test_labels[r:r + 1])

print("Label: ", Y_single_data.data)
single_prediction = model(X_single_data)
print("Prediction: ", torch.max(single_prediction.data, 1)[1])

Accuracy: 0.9787
Label:  
 6
[torch.LongTensor of size 1]

Prediction:  
 6
[torch.LongTensor of size 1]

