mnist cnn

In [34]:
import torch
import torchvision

device = 'cuda' if torch.cuda.is_available() else 'cpu'

torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed(777)

In [35]:
learning_rate = 0.001
training_epochs = 15
batch_size = 100

In [36]:
mnist_train = torchvision.datasets.MNIST(root = 'MNIST_data', train = True, transform = torchvision.transforms.ToTensor(), download = True)
mnist_test = torchvision.datasets.MNIST(root = 'MNIST_data', train = False, transform = torchvision.transforms.ToTensor(), download = True)

In [37]:
data_loader = torch.utils.data.DataLoader(dataset = mnist_train, batch_size = batch_size, shuffle = True, drop_last = True)

In [38]:
class CNN(torch.nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.layer1 = torch.nn.Sequential(
            torch.nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(2)
        )
        self.layer2 = torch.nn.Sequential(
            torch.nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(2)
        )
        self.fc = torch.nn.Linear(7*7*64, 10, bias=True)
        torch.nn.init.xavier_uniform_(self.fc.weight)
    
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)

        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out


In [39]:
model = CNN().to(device)

In [40]:
criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

In [41]:
total_batch = len(data_loader)

for epoch in range(training_epochs):
    avg_cost = 0

    for X, Y in data_loader:
        X = X.to(device)
        Y = Y.to(device)

        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / total_batch
    
    print("Epoch: {:2d} cost = {}".format(epoch, avg_cost))
print("Learning Finished")

Epoch:  0 cost = 0.22528058290481567
Epoch:  1 cost = 0.06271754205226898
Epoch:  2 cost = 0.046158112585544586
Epoch:  3 cost = 0.03745734319090843
Epoch:  4 cost = 0.03105783648788929
Epoch:  5 cost = 0.026011792942881584
Epoch:  6 cost = 0.021593118086457253
Epoch:  7 cost = 0.018191851675510406
Epoch:  8 cost = 0.01591910980641842
Epoch:  9 cost = 0.013598695397377014
Epoch: 10 cost = 0.010060638189315796
Epoch: 11 cost = 0.009766174480319023
Epoch: 12 cost = 0.008214268833398819
Epoch: 13 cost = 0.007499884348362684
Epoch: 14 cost = 0.0061996737495064735
Learning Finished


In [43]:
with torch.no_grad():
    X_test = mnist_test.data.view(-1, 1, 28, 28).float().to(device)
    Y_test = mnist_test.targets.to(device)

    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, dim = 1) == Y_test
    accuracy = correct_prediction.float().mean()

    print("accuracy :", accuracy.item())


accuracy : 0.9854999780654907


In [44]:
# 모델이 너무 깊은 경웽는 accuracy가 오히려 떨어질 수도 있음, 효율적인 구조롤 잘 쌓아야 함