In [None]:
import torch
from torch import nn, optim
import torchvision
from torchvision import datasets, transforms

import tqdm

from torch.nn import ModuleList

In [None]:
# Hyperparameters
learning_rate = 1e-3
batch_size = 64

# Data
train_data_mnist = datasets.MNIST('D#datasets', train=True, download=True, transform=transforms.ToTensor())
test_data_mnist = datasets.MNIST('D#datasets', train=False, download=True, transform=transforms.ToTensor())

print(len(train_data_mnist))
train_set, val_set = torch.utils.data.random_split(train_data_mnist, [50000, 10000])

train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)
dev_loader = torch.utils.data.DataLoader(val_set, batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(test_data_mnist, batch_size=batch_size)

100%|██████████| 9.91M/9.91M [00:00<00:00, 45.0MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 1.77MB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 14.7MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 6.74MB/s]


60000


In [None]:
# Model Init

class CNN(torch.nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.keep_prob = 0.5

        n_channels_1 = 6
        n_channels_2 = 16

        self.layer1 = torch.nn.Sequential(
            torch.nn.Conv2d(1, n_channels_1, kernel_size=3, stride=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.layer2 = torch.nn.Sequential(
            torch.nn.Conv2d(n_channels_1, n_channels_2, kernel_size=5, stride=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.fc3 = torch.nn.Linear(4 * 4 * n_channels_2, 120, bias=True)
        torch.nn.init.xavier_uniform_(self.fc3.weight)
        self.layer3 = torch.nn.Sequential(
            self.fc3,
            torch.nn.ReLU(),
            torch.nn.Dropout(p=1 - self.keep_prob)
        )

        self.fc4 = torch.nn.Linear(120, 80, bias=True)
        torch.nn.init.xavier_uniform_(self.fc4.weight)
        self.layer4 = torch.nn.Sequential(
            self.fc4,
            torch.nn.ReLU(),
            torch.nn.Dropout(p=1 - self.keep_prob)
        )

        self.fc5 = torch.nn.Linear(80, 10, bias=True)
        torch.nn.init.xavier_uniform_(self.fc5.weight)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.size(0), -1)  # Flatten them for FC

        out = self.layer3(out)
        out = self.layer4(out)
        out = self.fc5(out)
        return out

In [None]:
# Test Function

def test(data_loader, model):
    model.eval()
    n_predict = 0
    n_correct = 0
    with torch.no_grad():
        for X, Y in tqdm.tqdm(data_loader):
            y_hat = model(X)
            _, predicted = torch.max(y_hat, 1)

            n_predict += len(predicted)
            n_correct += (Y == predicted).sum()

    accuracy = n_correct / n_predict
    print(f"Accuracy: {accuracy} ()")


In [None]:
# Initialize

model = CNN()
criterion = torch.nn.CrossEntropyLoss()  # 비용 함수에 소프트맥스 포함됨
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
# Training

training_epochs = 5

for epoch in range(training_epochs):
    model.train()
    cost = 0
    n_batches = 0

    for X, Y in tqdm.tqdm(train_loader):  # 미니 배치 단위
        # X: (batch_size, 1, 28, 28) — 채널 수 1
        optimizer.zero_grad()
        y_hat = model(X)
        loss = criterion(y_hat, Y)
        loss.backward()
        optimizer.step()

        cost += loss.item()
        n_batches += 1

    cost /= n_batches
    print('[Epoch: {:>4}] cost = {:>.9}'.format(epoch + 1, cost))
    print("Dev")
    test(dev_loader, model)


100%|██████████| 782/782 [00:21<00:00, 35.85it/s]


[Epoch:    1] cost = 0.619917656
Dev


100%|██████████| 157/157 [00:02<00:00, 77.96it/s]


Accuracy: 0.9541000127792358 ()


100%|██████████| 782/782 [00:17<00:00, 44.44it/s]


[Epoch:    2] cost = 0.206069446
Dev


100%|██████████| 157/157 [00:02<00:00, 58.61it/s]


Accuracy: 0.9666000008583069 ()


100%|██████████| 782/782 [00:17<00:00, 44.88it/s]


[Epoch:    3] cost = 0.15150107
Dev


100%|██████████| 157/157 [00:02<00:00, 75.18it/s]


Accuracy: 0.973800003528595 ()


100%|██████████| 782/782 [00:18<00:00, 41.35it/s]


[Epoch:    4] cost = 0.128635042
Dev


100%|██████████| 157/157 [00:02<00:00, 76.05it/s]


Accuracy: 0.9781000018119812 ()


100%|██████████| 782/782 [00:18<00:00, 41.89it/s]


[Epoch:    5] cost = 0.110910387
Dev


100%|██████████| 157/157 [00:02<00:00, 67.28it/s]

Accuracy: 0.9789000153541565 ()





In [None]:
# Test

test(test_loader, model)

100%|██████████| 157/157 [00:02<00:00, 56.57it/s]

Accuracy: 0.9850000143051147 ()



