In [1]:
import torch
import torch.nn as nn
import torch.optim as optim 
import torch.nn.functional as F
from torchvision import datasets,transforms
from torch.utils import data

  from .autonotebook import tqdm as notebook_tqdm


In [10]:
#hyperparmeter
USE_CUDA = torch.cuda.is_available()
DEVICE = torch.device("cuda" if USE_CUDA else "cpu")
EPOCHS = 10
BATCH_SIZE = 64

In [11]:
train_loader = data.DataLoader(
    dataset = datasets.MNIST(
                root = '../PaperWithCode/',
                download=True,
                train=True,
                transform = transforms.Compose([
                            transforms.ToTensor(),
                            transforms.Normalize((0.1307,),(0.3081,))
                            ])),
    shuffle =True,
    batch_size = BATCH_SIZE
)

test_loader = data.DataLoader(
    dataset= datasets.MNIST(
             root = '../PaperWithCode/',
             download=True,
             train=False,
             transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,),(0.3081,))
                    ])),
    shuffle = True, 
    batch_size=16
)

In [12]:
class NET(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.layer1 = nn.Sequential(
            nn.Conv2d(1,10,kernel_size=5),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
        )

        self.layer2 = nn.Sequential(
            nn.Conv2d(10,20,kernel_size=5),
            nn.ReLU(),
            nn.Dropout2d(),
            nn.MaxPool2d(kernel_size=2),
        )

        self.layer3 = nn.Sequential(
            nn.Linear(320,50),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(50,10),
        )



    def forward(self,x):
        x=self.layer1(x)
        x=self.layer2(x)
        x=x.view(-1,320)
        x=self.layer3(x)

        return x


In [13]:
model = NET().to(DEVICE)
optimizer = optim.Adam(model.parameters(),lr=1e-3)

In [14]:
def train(model,train_loader,optimizer):
    model.train()
    for step, (data,target) in enumerate(train_loader):
        data, target = data.to(DEVICE), target.to(DEVICE) #data([64,1,28,28]) , target([64])
        optimizer.zero_grad()
        output = model(data) #output([64,10])
        loss = F.cross_entropy(output,target)
        loss.backward()
        optimizer.step()
        

In [15]:
def evaluate(model, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(DEVICE), target.to(DEVICE)
            output = model(data) 

            # 배치 오차를 합산
            test_loss += F.cross_entropy(output, target,
                                         reduction='sum').item()

            # 가장 높은 값을 가진 인덱스가 바로 예측값
            pred = output.max(1, keepdim=True)[1] #output([16,10]), max(1,keepdim=True) : 1=axis, max의 return은 값과 인덱스[1]
            correct += pred.eq(target.view_as(pred)).sum().item() #max의 인덱스와 라벨(target)이 같다면 correct에 더함

    test_loss /= len(test_loader.dataset)
    test_accuracy = 100. * correct / len(test_loader.dataset)
    return test_loss, test_accuracy

In [16]:
for epoch in range(1, EPOCHS+1):
    train(model, train_loader, optimizer)
    test_loss, test_accuracy = evaluate(model, test_loader)
    
    print('[{}] Test Loss: {:.4f}, Accuracy: {:.2f}%'.format(
          epoch, test_loss, test_accuracy))

[1] Test Loss: 0.0856, Accuracy: 97.39%
[2] Test Loss: 0.0612, Accuracy: 98.01%
[3] Test Loss: 0.0447, Accuracy: 98.64%
[4] Test Loss: 0.0455, Accuracy: 98.57%
[5] Test Loss: 0.0422, Accuracy: 98.74%
[6] Test Loss: 0.0382, Accuracy: 98.77%
[7] Test Loss: 0.0342, Accuracy: 98.94%
[8] Test Loss: 0.0334, Accuracy: 98.90%
[9] Test Loss: 0.0325, Accuracy: 98.95%
[10] Test Loss: 0.0307, Accuracy: 99.06%
