In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn.init


class CNN(torch.nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # 1 28 28 -> 32 14 14
        self.layer1 = torch.nn.Sequential(
            torch.nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2),
        )
        # 32 14 14 -> 64 7 7
        self.layer2 = torch.nn.Sequential(
            torch.nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.fc = torch.nn.Linear(7 * 7 * 64, 10, bias=True)
        torch.nn.init.xavier_uniform_(self.fc.weight)  # 가중치 초기화 방식

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.size(0), -1)  # flatten
        out = self.fc(out)
        return out


In [5]:
def main():

    device = "cuda" if torch.cuda.is_available() else "cpu"
    torch.manual_seed(777)
    if device == "cuda":
        print("cuda avaliable")
        torch.cuda.manual_seed_all(777)

    # Hyper parameters
    batch_size = 256
    training_epochs = 15
    learning_rate = 0.001
    num_workers = 4  # 멀티 태스킹, 속도와 메모리 보고 적절한 값 선택 

    # dataset, drop_last: 딱 떨어지지 않는 batch는 안씀
    mnist_train = torchvision.datasets.MNIST(root="./data", train=True, transform=transforms.ToTensor(), download=True)
    mnist_test = torchvision.datasets.MNIST(root="./data", train=False, transform=transforms.ToTensor(), download=True)
    data_loader = torch.utils.data.DataLoader(
        dataset=mnist_train, batch_size=batch_size, shuffle=True, drop_last=True, num_workers=num_workers
    )

    model = CNN().to(device)
    criterion = torch.nn.CrossEntropyLoss().to(device)  # softmax 포함되어 있음
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    total_batch = len(data_loader)
    print(f"총 배치의 수: {total_batch}")

    for epoch in range(training_epochs):
        avg_cost = 0

        for X, Y in data_loader:  # 미니 배치 단위로 꺼내온다, X = 미니배치, Y = label
            # image is already size of 28x28, no reshape
            # label is not one-hot encoded
            X = X.to(device)
            Y = Y.to(device)

            optimizer.zero_grad()
            hypothesis = model(X)
            cost = criterion(hypothesis, Y)
            cost.backward()
            optimizer.step()

            avg_cost += cost / total_batch

        with torch.no_grad():
            X_test = mnist_test.test_data.view(len(mnist_test), 1, 28, 28).float().to(device)
            Y_test = mnist_test.test_labels.to(device)

            prediction = model(X_test)
            correct_prediction = torch.argmax(prediction, 1) == Y_test
            accuracy = correct_prediction.float().mean()

        print(f"[Epoch: {epoch+1:>4}] cost = {avg_cost:>.9} test_accuracy = {accuracy.item():>.9}")

if __name__ == "__main__":
    # 멀티태스킹 오류 안나려면 이렇게 실행해야 한다 
    main()


cuda avaliable
총 배치의 수: 234
[Epoch:    1] cost = 0.364110708 test_accuracy = 0.959499955
[Epoch:    2] cost = 0.0870382041 test_accuracy = 0.964299977
[Epoch:    3] cost = 0.0592021868 test_accuracy = 0.97299999
[Epoch:    4] cost = 0.049282752 test_accuracy = 0.976399958
[Epoch:    5] cost = 0.0418784656 test_accuracy = 0.977499962
[Epoch:    6] cost = 0.0370259583 test_accuracy = 0.98209995
[Epoch:    7] cost = 0.030858526 test_accuracy = 0.981399953
[Epoch:    8] cost = 0.0280581154 test_accuracy = 0.983399987
[Epoch:    9] cost = 0.024439102 test_accuracy = 0.98149997
[Epoch:   10] cost = 0.0229668822 test_accuracy = 0.986399949
[Epoch:   11] cost = 0.0207190942 test_accuracy = 0.986099958
[Epoch:   12] cost = 0.0181998257 test_accuracy = 0.984899998
[Epoch:   13] cost = 0.0158414915 test_accuracy = 0.986799955
[Epoch:   14] cost = 0.0135293743 test_accuracy = 0.985599995
[Epoch:   15] cost = 0.0124306241 test_accuracy = 0.987800002


In [3]:
def main():

    device = "cuda" if torch.cuda.is_available() else "cpu"
    torch.manual_seed(777)
    if device == "cuda":
        print("cuda avaliable")
        torch.cuda.manual_seed_all(777)

    # Hyper parameters
    batch_size = 256
    training_epochs = 15
    learning_rate = 0.001
    # num_workers = 4  # cpu 사용량, 스레드의 절반 정도면 무난하다.

    # dataset, drop_last: 딱 떨어지지 않는 batch는 안씀
    mnist_train = torchvision.datasets.MNIST(root="./data", train=True, transform=transforms.ToTensor(), download=True)
    mnist_test = torchvision.datasets.MNIST(root="./data", train=False, transform=transforms.ToTensor(), download=True)
    data_loader = torch.utils.data.DataLoader(dataset=mnist_train, batch_size=batch_size, shuffle=True, drop_last=True)

    model = CNN().to(device)
    criterion = torch.nn.CrossEntropyLoss().to(device)  # softmax 포함되어 있음
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    total_batch = len(data_loader)
    print(f"총 배치의 수: {total_batch}")

    for epoch in range(training_epochs):
        avg_cost = 0

        for X, Y in data_loader:  # 미니 배치 단위로 꺼내온다, X = 미니배치, Y = label
            # image is already size of 28x28, no reshape
            # label is not one-hot encoded
            X = X.to(device)
            Y = Y.to(device)

            optimizer.zero_grad()
            hypothesis = model(X)
            cost = criterion(hypothesis, Y)
            cost.backward()
            optimizer.step()

            avg_cost += cost / total_batch

        with torch.no_grad():
            X_test = mnist_test.test_data.view(len(mnist_test), 1, 28, 28).float().to(device)
            Y_test = mnist_test.test_labels.to(device)

            prediction = model(X_test)
            correct_prediction = torch.argmax(prediction, 1) == Y_test
            accuracy = correct_prediction.float().mean()

        print(f"[Epoch: {epoch+1:>4}] cost = {avg_cost:>.9} test_accuracy = {accuracy.item():>.9}")

    # test


if __name__ == "__main__":
    main()


cuda avaliable
총 배치의 수: 234
[Epoch:    1] cost = 0.364147872 test_accuracy = 0.959499955
[Epoch:    2] cost = 0.0869780704 test_accuracy = 0.964100003
[Epoch:    3] cost = 0.0591695718 test_accuracy = 0.972799957
[Epoch:    4] cost = 0.049305588 test_accuracy = 0.976399958
[Epoch:    5] cost = 0.0418815799 test_accuracy = 0.976799965
[Epoch:    6] cost = 0.0370900594 test_accuracy = 0.981399953
[Epoch:    7] cost = 0.0308840629 test_accuracy = 0.98149997
[Epoch:    8] cost = 0.0280120112 test_accuracy = 0.983399987
[Epoch:    9] cost = 0.02446758 test_accuracy = 0.981999993
[Epoch:   10] cost = 0.0229756311 test_accuracy = 0.986399949
[Epoch:   11] cost = 0.0205761697 test_accuracy = 0.985899985
[Epoch:   12] cost = 0.0177921448 test_accuracy = 0.984499991
[Epoch:   13] cost = 0.0155437132 test_accuracy = 0.987099946
[Epoch:   14] cost = 0.0131805018 test_accuracy = 0.984799981
[Epoch:   15] cost = 0.0120643638 test_accuracy = 0.987499952


In [19]:
# Hyper parameters
batch_size = 64
training_epochs = 15
learning_rate = 0.001
# num_workers = 8  # cpu 사용량, 스레드의 절반 정도면 무난하다.

# dataset, drop_last: 딱 떨어지지 않는 batch는 안씀
mnist_train = torchvision.datasets.MNIST(root="./data", train=True, transform=transforms.ToTensor(), download=True)
mnist_test = torchvision.datasets.MNIST(root="./data", train=False, transform=transforms.ToTensor(), download=True)
data_loader = torch.utils.data.DataLoader(dataset=mnist_train, batch_size=batch_size, shuffle=True, drop_last=True)

model = CNN().to(device)
criterion = torch.nn.CrossEntropyLoss().to(device)  # softmax 포함되어 있음
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
total_batch = len(data_loader)
print(f"총 배치의 수: {total_batch}")

for epoch in range(training_epochs):
    avg_cost = 0

    for X, Y in data_loader:  # 미니 배치 단위로 꺼내온다, X = 미니배치, Y = label
        # image is already size of 28x28, no reshape
        # label is not one-hot encoded
        X = X.to(device)
        Y = Y.to(device)

        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / total_batch

    # test
    with torch.no_grad():
        X_test = mnist_test.test_data.view(len(mnist_test), 1, 28, 28).float().to(device)
        Y_test = mnist_test.test_labels.to(device)
        prediction = model(X_test)
        correct_prediction = torch.argmax(prediction, 1) == Y_test
        accuracy = correct_prediction.float().mean()

    print(f"[Epoch: {epoch+1:>4}] cost = {avg_cost:>.9}, test Accuracy = {accuracy.item():>.9}")


총 배치의 수: 937
[Epoch:    1] cost = 0.184302002, test Accuracy = 0.964399993
[Epoch:    2] cost = 0.0524900779, test Accuracy = 0.961799979
[Epoch:    3] cost = 0.0387319624, test Accuracy = 0.964299977
[Epoch:    4] cost = 0.0308950339, test Accuracy = 0.97359997
[Epoch:    5] cost = 0.0249244235, test Accuracy = 0.974599957
[Epoch:    6] cost = 0.0204714406, test Accuracy = 0.970499992
[Epoch:    7] cost = 0.0168778282, test Accuracy = 0.979599953
[Epoch:    8] cost = 0.013719188, test Accuracy = 0.986799955
[Epoch:    9] cost = 0.0112591786, test Accuracy = 0.985099971
[Epoch:   10] cost = 0.00957069173, test Accuracy = 0.98179996
[Epoch:   11] cost = 0.00816438813, test Accuracy = 0.977299988
[Epoch:   12] cost = 0.00564985536, test Accuracy = 0.980799973
[Epoch:   13] cost = 0.00599969691, test Accuracy = 0.984999955
[Epoch:   14] cost = 0.00535799051, test Accuracy = 0.981899977
[Epoch:   15] cost = 0.0052357465, test Accuracy = 0.979499996
