In [3]:
import torch
from torch.autograd import Variable
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import random
import torch.nn.init

torch.manual_seed(777)

# 파라미터
learning_rate = 0.001
training_epochs = 15
batch_size = 100
keep_prob = 0.7

# MNIST dataset
mnist_train = dsets.MNIST(root = "MNIST_data/", train = True, 
                         transform = transforms.ToTensor(), download = True)
mnist_test = dsets.MNIST(root = "MNIST_data/", train = False,
                        transform = transforms.ToTensor(), download = True)

# dataset loader
data_loader = torch.utils.data.DataLoader(dataset = mnist_train, 
                                          batch_size = batch_size, shuffle = True)

# 10.4 층을 겹겹이 앃는다.
linear1 = torch.nn.Linear(784, 512, bias = True)
linear2 = torch.nn.Linear(512, 512, bias = True)
linear3 = torch.nn.Linear(512, 512, bias = True)
linear4 = torch.nn.Linear(512, 512, bias = True)
linear5 = torch.nn.Linear(512, 10, bias = True)

# 10.1 Sigmoid -> ReLU 활성화 함수 사용
relu = torch.nn.ReLU()

# p는 PyTorch에서 dropout하는 비율
# 10.3 Dropout
dropout = torch.nn.Dropout(p = 1 - keep_prob)

# Xavier Initializer
# Xavier - 입력값과 출력값 사이의 난수를 선택해서 입력값의 제곱근으로 나눈다.
# 10.2 가중치 초기화
torch.nn.init.xavier_uniform(linear1.weight)
torch.nn.init.xavier_uniform(linear2.weight)
torch.nn.init.xavier_uniform(linear3.weight)
torch.nn.init.xavier_uniform(linear4.weight)
torch.nn.init.xavier_uniform(linear5.weight)

# model
model = torch.nn.Sequential(linear1, relu, dropout,
                           linear2, relu, dropout,
                           linear3, relu, dropout,
                           linear4, relu, dropout,
                           linear5)

# cost / loss & optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

# train my model
for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = len(mnist_train)
    
    for i, (batch_xs, batch_ys) in enumerate(data_loader):
        X = Variable(batch_xs.view(-1, 28 * 28))
        Y = Variable(batch_ys)
        
        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()
        
        avg_cost += cost / total_batch
        
    print("[Epoch : {:>4}] cost = {:>.9}".format(epoch + 1, avg_cost.data))
    
print("Learning Finished!")

model.eval() # 모델의 평가 모드를 셋팅한다. (dropout = False)

X_test = Variable(mnist_test.test_data.view(-1, 28 * 28).float())
Y_test = Variable(mnist_test.test_labels)

prediction = model(X_test)
correct_prediction = (torch.max(prediction.data, 1)[1] == Y_test.data)
accuracy = correct_prediction.float().mean()
print("Accuracy :", accuracy)

r = random.randint(0, len(mnist_test) - 1)
X_single_data = Variable(mnist_test.test_data[r : r + 1].view(-1, 28 * 28).float())
Y_single_data = Variable(mnist_test.test_labels[r : r + 1])

print("Label :", Y_single_data.data)
single_prediction = model(X_single_data)
print("Prediction :", torch.max(single_prediction.data, 1)[1])



[Epoch :    1] cost = 0.00307307998
[Epoch :    2] cost = 0.00139737187
[Epoch :    3] cost = 0.00112180784
[Epoch :    4] cost = 0.000960307661
[Epoch :    5] cost = 0.000836153806
[Epoch :    6] cost = 0.000732520595
[Epoch :    7] cost = 0.000668318709
[Epoch :    8] cost = 0.000622942636
[Epoch :    9] cost = 0.000575020851
[Epoch :   10] cost = 0.00057142158
[Epoch :   11] cost = 0.000534050574
[Epoch :   12] cost = 0.00049240177
[Epoch :   13] cost = 0.000491272891
[Epoch :   14] cost = 0.000425122591
[Epoch :   15] cost = 0.000410493172
Learning Finished!
Accuracy : tensor(0.9818)
Label : tensor([5])
Prediction : tensor([5])


