In [1]:
# 패키지 임포트

import numpy as np
import torch
import torch.nn.functional as F
import torch.nn as nn
from torchvision import datasets, transforms
from tqdm.notebook import tqdm

In [2]:
# mnist 데이터 다운로드

mnist_train = datasets.MNIST(root="./datasets", train=True, transform=transforms.ToTensor(), download=True)
mnist_test = datasets.MNIST(root="./datasets", train=False, transform=transforms.ToTensor(), download=True)
train_loader = torch.utils.data.DataLoader(mnist_train, batch_size=100, shuffle=True)
test_loader = torch.utils.data.DataLoader(mnist_test, batch_size=100, shuffle=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./datasets\MNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:01<00:00, 7784764.67it/s] 


Extracting ./datasets\MNIST\raw\train-images-idx3-ubyte.gz to ./datasets\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./datasets\MNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 28883093.42it/s]


Extracting ./datasets\MNIST\raw\train-labels-idx1-ubyte.gz to ./datasets\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./datasets\MNIST\raw\t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 2233771.04it/s]


Extracting ./datasets\MNIST\raw\t10k-images-idx3-ubyte.gz to ./datasets\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./datasets\MNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 4546665.58it/s]

Extracting ./datasets\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./datasets\MNIST\raw






In [3]:
# 네트워크 정의

input_size = 784 #입력 데이터 크기
hidden_sizes = [128, 64] #perceptrons 개수를 저장해놓은 배열인듯
output_size = 10 #출력 데이터 크키

#모델 쌓기
model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]),
                      nn.ReLU(),
                      nn.Linear(hidden_sizes[0], hidden_sizes[1]),
                      nn.ReLU(),
                      nn.Linear(hidden_sizes[1], output_size),
                      nn.LogSoftmax(dim=1))

In [4]:
#모델 형태 확인
model

Sequential(
  (0): Linear(in_features=784, out_features=128, bias=True)
  (1): ReLU()
  (2): Linear(in_features=128, out_features=64, bias=True)
  (3): ReLU()
  (4): Linear(in_features=64, out_features=10, bias=True)
  (5): LogSoftmax(dim=1)
)

In [5]:
# 손실함수와 최적화

criterion = nn.CrossEntropyLoss() #cross entropy loss를 손실함수로 지정
optimizer = torch.optim.SGD(model.parameters(), lr=0.09) #최적화는 SGD(Stochastic Gradient Descent)로 지정하고 learning rate는 0.09

In [6]:
import time

In [7]:
# 훈련
start = time.time()
epochs = 15
for e in range(epochs):
    running_loss = 0
    for images, labels in train_loader:
        images = images.view(images.shape[0], -1)

        optimizer.zero_grad() #(https://algopoolja.tistory.com/55)
        #gradient는 update되는 변수이므로 epoch마다 reset

        #모델 계산
        output = model(images)

        #손실 계산
        loss = criterion(output, labels)

        #역전파
        loss.backward() #가중치 조절(https://www.datamaker.io/blog/posts/32)

        #최적화
        optimizer.step()

        running_loss += loss.item()
    else:
        print("Epoch {} - Training loss: {}".format(e, running_loss/len(train_loader)))
end = time.time()
print('실행시간 :',end - start)

Epoch 0 - Training loss: 0.6826581065232555
Epoch 1 - Training loss: 0.2594762390355269
Epoch 2 - Training loss: 0.19270467123016716
Epoch 3 - Training loss: 0.15151623180756965
Epoch 4 - Training loss: 0.12431899580173195
Epoch 5 - Training loss: 0.1045882419931392
Epoch 6 - Training loss: 0.09013252958655357
Epoch 7 - Training loss: 0.07880304511326054
Epoch 8 - Training loss: 0.06907466393740226
Epoch 9 - Training loss: 0.061103784195923556
Epoch 10 - Training loss: 0.05556831846324106
Epoch 11 - Training loss: 0.04996250182700654
Epoch 12 - Training loss: 0.044527123076065135
Epoch 13 - Training loss: 0.03915965288256606
Epoch 14 - Training loss: 0.03520057206352552
실행시간 : 111.16378259658813


In [8]:
model2 = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]),
                      nn.ReLU(),
                      nn.Linear(hidden_sizes[0], hidden_sizes[1]),
                      nn.ReLU(),
                      nn.Linear(hidden_sizes[1], output_size),
                      nn.LogSoftmax(dim=1))
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model2.parameters(), lr=0.09) #최적화 방법 변화
#Adam : 현재 그래디언트와 이전 그래디언트의 지수 가중 평균을 이용

In [9]:
# 훈련
start = time.time()
epochs = 15
for e in range(epochs):
    running_loss = 0
    for images, labels in train_loader:
        images = images.view(images.shape[0], -1)

        optimizer.zero_grad() #(https://algopoolja.tistory.com/55)
        #gradient는 update되는 변수이므로 epoch마다 reset

        #모델 계산
        output = model2(images)

        #손실 계산
        loss = criterion(output, labels)

        #역전파
        loss.backward() #가중치 조절(https://www.datamaker.io/blog/posts/32)

        #최적화
        optimizer.step()

        running_loss += loss.item()
    else:
        print("Epoch {} - Training loss: {}".format(e, running_loss/len(train_loader)))
end = time.time()
print('실행시간 :',end - start)

Epoch 0 - Training loss: 1.3603959773977599
Epoch 1 - Training loss: 1.0735350477695464
Epoch 2 - Training loss: 1.0739300779501597
Epoch 3 - Training loss: 1.1280544258157412
Epoch 4 - Training loss: 1.2397646766901016
Epoch 5 - Training loss: 1.217793970008691
Epoch 6 - Training loss: 1.1819763277967772
Epoch 7 - Training loss: 1.2067379808425904
Epoch 8 - Training loss: 1.2005056012670199
Epoch 9 - Training loss: 1.4789105400443077
Epoch 10 - Training loss: 1.4939690430959065
Epoch 11 - Training loss: 1.4085396311680476
Epoch 12 - Training loss: 1.3942858470479647
Epoch 13 - Training loss: 1.4129943973819414
Epoch 14 - Training loss: 1.3799482793609301
실행시간 : 167.9534342288971


SGD가 가장 기본적이고 빠름

In [10]:
# 테스트
correct = 0
total = len(mnist_test)
with torch.no_grad():
    # Iterate through test set minibatchs
    for images, labels in tqdm(test_loader):
        # Forward pass
        #x = images.view(-1, 28*28)
        x = images.view(images.shape[0], -1)
        y = model(x)

        predictions = torch.argmax(y, dim=1)
        correct += torch.sum((predictions == labels).float())

print('Test accuracy: {}'.format(correct/total))


  0%|          | 0/100 [00:00<?, ?it/s]

Test accuracy: 0.9768000245094299
