In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from torchvision import datasets, transforms

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
#MNIST 데이터겟 불러오기
train_dataset = datasets.MNIST(root="./mnist_data", train=True, transform=transforms.ToTensor(), download=True)
test_dataset = datasets.MNIST(root="./mnist_data", train=False, transform=transforms.ToTensor())

batch_size = 64

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./mnist_data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 94315768.66it/s]


Extracting ./mnist_data/MNIST/raw/train-images-idx3-ubyte.gz to ./mnist_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./mnist_data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 79121942.41it/s]

Extracting ./mnist_data/MNIST/raw/train-labels-idx1-ubyte.gz to ./mnist_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./mnist_data/MNIST/raw/t10k-images-idx3-ubyte.gz



100%|██████████| 1648877/1648877 [00:00<00:00, 20064207.04it/s]


Extracting ./mnist_data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./mnist_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./mnist_data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 23548243.22it/s]


Extracting ./mnist_data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./mnist_data/MNIST/raw



In [4]:
train_dataset

Dataset MNIST
    Number of datapoints: 60000
    Root location: ./mnist_data
    Split: Train
    StandardTransform
Transform: ToTensor()

In [5]:
test_dataset

Dataset MNIST
    Number of datapoints: 10000
    Root location: ./mnist_data
    Split: Test
    StandardTransform
Transform: ToTensor()

In [6]:
class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    self.l1 = nn.Linear(784, 480)
    self.l2 = nn.Linear(480, 200)
    self.l3 = nn.Linear(200, 80)
    self.l4 = nn.Linear(80, 10)

  def forward(self, x):
    x = x.view(-1, 784)
    x = F.relu(self.l1(x))
    x = F.relu(self.l2(x))
    x = F.relu(self.l3(x))
    return self.l4(x)

In [7]:
model = Net().to(device)
print(model)

Net(
  (l1): Linear(in_features=784, out_features=480, bias=True)
  (l2): Linear(in_features=480, out_features=200, bias=True)
  (l3): Linear(in_features=200, out_features=80, bias=True)
  (l4): Linear(in_features=80, out_features=10, bias=True)
)


In [8]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.2)

def train(epoch):
  model.train()
  for batch_idx, (data, target) in enumerate(train_loader):
    data, target = data.to(device), target.to(device)
    optimizer.zero_grad()
    output = model(data)
    loss = criterion(output, target)
    loss.backward()
    optimizer.step() # 모델 가중치 업데이트
    if batch_idx % 100 == 0:
        print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.8f}'.format(
            epoch, batch_idx * len(data), len(train_loader.dataset),
            100 * batch_idx / len(train_loader), loss.data))


In [9]:
def test():
    model.eval()
    loss = 0
    correct = 0
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        output = model(data)
        loss += criterion(output, target).data.item()
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()
    loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        loss, correct, len(test_loader.dataset),
        100 * correct / len(test_loader.dataset)))


In [None]:
for epoch in range(1, 11):
  train(epoch)
  test()


Test set: Average loss: 0.0197, Accuracy: 7084/10000 (71%)


Test set: Average loss: 0.0066, Accuracy: 8785/10000 (88%)


Test set: Average loss: 0.0054, Accuracy: 8978/10000 (90%)


Test set: Average loss: 0.0046, Accuracy: 9144/10000 (91%)


Test set: Average loss: 0.0041, Accuracy: 9237/10000 (92%)


Test set: Average loss: 0.0036, Accuracy: 9328/10000 (93%)


Test set: Average loss: 0.0031, Accuracy: 9422/10000 (94%)



In [None]:
import numpy as np
import matplotlib.pyplot as plt

image_data = test_dataset[0][0].to(device)
image_label = test_dataset[0][1]
print('숫자 이미지 X의 크기:', image_data.size())
print('숫자 이미지 X의 레이블:', image_label)
print(model(image_data))
plt.imshow(image_data.cpu().numpy()[0], cmap='gray')