#Adam Optimizer

In [49]:
# Imports
import torch
import torch.optim as optim
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import random

In [50]:
# GPU  (device = 'cuda')
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# for reproducibility
random.seed(777)
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

In [51]:
# parameters
learning_rate = 1e-3
training_epochs = 15
batch_size = 100

In [52]:
# MNIST dataset
mnist_train = dsets.MNIST(root='MNIST_data/',
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True)
mnist_test = dsets.MNIST(root='MNIST_data/',
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True)

In [53]:
# dataset load
data_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

In [54]:
# 입력 텐서의 크기가 28 * 28 = 784 이고, 출력 텐서의 크기가 10 이므로
linear = torch.nn.Linear(784, 10, bias=True).to(device)

In [55]:
# torch.nn.init.normal_(tensor, mean=0.0, std=1.0)
# N(std, mean) 을 따르는 정규분포에서의 값을 n 차원의 tensor 에 집어넣는다.
torch.nn.init.normal_(linear.weight)

Parameter containing:
tensor([[-1.2901,  0.3930, -0.1891,  ..., -0.1626, -0.3275,  0.5608],
        [-1.6757, -0.2454,  0.9221,  ...,  0.9559,  0.6160, -0.4226],
        [ 0.2444,  1.4810, -2.0906,  ...,  0.1206,  1.0044, -0.3737],
        ...,
        [ 0.6234,  1.8019, -2.7563,  ..., -0.5889, -0.5576,  0.7360],
        [-0.2871, -1.3313, -2.2248,  ...,  0.0309,  0.9180, -0.1482],
        [ 0.7678,  0.6624, -0.5362,  ...,  0.2338,  0.3688, -0.7182]],
       device='cuda:0', requires_grad=True)

In [56]:
# define cost/loss & optimizer
# use 'Adam' insead of 'SGD'
criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(linear.parameters(), lr=learning_rate)

In [57]:
total_batch = len(data_loader)
for epoch in range(training_epochs+1):
  avg_cost = 0

  for X, Y in data_loader:
    # resahpe input image into [batch_size by 784] by using view function
    # label is not one-hot encoded
    X = X.view(-1, 28 * 28).to(device)
    Y = Y.to(device)

    # H(X) 계산
    hypothesis = linear(X)

    # Cost 계산
    cost = criterion(hypothesis, Y)

    # cost 로 H(X) 개선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    avg_cost +=cost / total_batch             # 매 batch 마다 cost 를 더해서 한 epoch 당 평균 cost 를 구한다.

  print('Epoch {:6d}/{} Cost: {:.4f}'.format(
      epoch, training_epochs, avg_cost
  ))
print('Learning Finished')

Epoch      0/15 Cost: 5.6457
Epoch      1/15 Cost: 1.6968
Epoch      2/15 Cost: 1.1216
Epoch      3/15 Cost: 0.8828
Epoch      4/15 Cost: 0.7489
Epoch      5/15 Cost: 0.6619
Epoch      6/15 Cost: 0.5998
Epoch      7/15 Cost: 0.5537
Epoch      8/15 Cost: 0.5175
Epoch      9/15 Cost: 0.4888
Epoch     10/15 Cost: 0.4652
Epoch     11/15 Cost: 0.4454
Epoch     12/15 Cost: 0.4286
Epoch     13/15 Cost: 0.4136
Epoch     14/15 Cost: 0.4012
Epoch     15/15 Cost: 0.3898
Learning Finished


In [58]:
# Test 하기 위해 with torch.no_grad() 를 적용
with torch.no_grad():
  X_test = mnist_test.test_data.view(-1, 28 * 28).float().to(device)
  Y_test = mnist_test.test_labels.to(device)

  prediction = linear(X_test)
  correct_prediction = torch.argmax(prediction, 1) == Y_test
  accuracy = correct_prediction.float().mean()
  print('Accuracy: ',accuracy.item())

  # Get one and predict
  # 한 가지 무작위의 이미지를 뽑아서 예측해보자
  r = random.randint(0, len(mnist_test) - 1)
  X_single_data = mnist_test.test_data[r:r+1].view(-1, 28 * 28).float().to(device)
  Y_single_data = mnist_test.test_labels[r:r+1].to(device)

  print('Label ',Y_single_data.item())
  single_prediction = linear(X_single_data)
  print('Prediction: ',torch.argmax(single_prediction, 1).item())

Accuracy:  0.8880333304405212
Label  9
Prediction:  9
