In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import pdb

# 같은 결과를 출력하기 위해 seed 값 고정
torch.manual_seed(0)
torch.cuda.manual_seed(0)

In [2]:
# MNIST 데이터를 로드하기 위한 트랜스폼 정의
transform = transforms.Compose([
    transforms.ToTensor(), # 이미지를 PyTorch Tensor로 변환
    transforms.Normalize((0.5,), (0.5,)) # 픽셀 값의 범위를 -1에서 1로 정규화
])

# 2개의 층으로 이루어진 MLP 모델 정의
class MLP(nn.Module):
    def __init__(self): # 생성자
        super(MLP, self).__init__() # 상속 받기
        self.fc1 = nn.Linear(28*28, 64) # 첫 번째 fully connected layer (입력: 28*28, 출력: 64)
        self.fc2 = nn.Linear(64, 10) # 두 번째 fully connected layer (입력: 64, 출력: 10)

    def forward(self, x):
        # 데이터의 shape을 변경 (Batch*784로 변경) -1 를 사용하는 경우 그 값을 자동으로 계산해줌
        x = x.view(-1, 28*28) # 이미지를 1차원으로 펼침

        x = self.fc1(x) # 첫 번째 fully connected layer 적용
        x = F.relu(x) # ReLU 활성화 함수를 적용
        x = self.fc2(x) # 두 번째 fully connected layer 적용

        # softmax : 모든 값을 0 ~ 1 사이로 Normalize하며, 모든 값들이 1로 되게 만드는 함수
        # Data의 shape은 batch*dim으로 구성, 따라서 2번째 차원에 활성화 함수를 적용
        return F.log_softmax(x, dim=1)

In [3]:
# 모델 훈련 함수
def train(model, device, train_loader, optimizer):
    model.train()
    for _, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

# 모델 평가 함수
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target).item()  # 배치 손실 합산
            pred = output.argmax(dim=1, keepdim=True)  # 가장 높은 값을 가진 인덱스를 가져옴
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    print(f'\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({100. * correct / len(test_loader.dataset):.2f}%)')

In [None]:
if __name__ == '__main__':

    train_dataset = datasets.MNIST('./data', train=True, download=True, transform=transform)
    test_dataset = datasets.MNIST('./data', train=False, transform=transform)

    n_epoch = 5
    batch_size = 64
    learning_rate = 0.1

    bs = [64, 128, 256]
    lr = [0.1, 0.01, 0.001]

    for batch_s in bs: # 인덱스와, 값을 반환
        print(f'[batch_size : {batch_s}]')
        train_loader = DataLoader(train_dataset, batch_size=batch_s, shuffle=True)
        test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        criterion = nn.CrossEntropyLoss()

        for learning_rate in lr:
            print(f'- Learning rate {learning_rate} ')
            model = MLP()
            optimizer = optim.Adam(model.parameters(), lr=learning_rate)
            model.to(device)
            for epoch in range(n_epoch):
                train(model, device, train_loader, optimizer)
                test(model, device, test_loader)


    # train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    # test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)

    # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # model = MLP()
    # criterion = nn.CrossEntropyLoss()

    # optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # model.to(device)
    # for epoch in range(n_epoch):
    #     train(model, device, train_loader, optimizer)
    #     test(model, device, test_loader)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 94969850.57it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 85668807.51it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz



100%|██████████| 1648877/1648877 [00:00<00:00, 22887949.21it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 7872119.33it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw

[batch_size : 64]
- Learning rate 0.1 

Test set: Average loss: 0.0020, Accuracy: 1990/10000 (19.90%)

Test set: Average loss: 0.0023, Accuracy: 1009/10000 (10.09%)

Test set: Average loss: 0.0023, Accuracy: 974/10000 (9.74%)

Test set: Average loss: 0.0023, Accuracy: 1135/10000 (11.35%)

Test set: Average loss: 0.0023, Accuracy: 1135/10000 (11.35%)
- Learning rate 0.01 

Test set: Average loss: 0.0003, Accuracy: 9150/10000 (91.50%)

Test set: Average loss: 0.0003, Accuracy: 9048/10000 (90.48%)

Test set: Average loss: 0.0002, Accuracy: 9309/10000 (93.09%)

Test set: Average loss: 0.0002, Accuracy: 9296/10000 (92.96%)

Test set: Average loss: 0.0002, Accuracy: 9275/10000 (92.75%)
- Learning rate 0.001 

Test set: Average loss: 0.0003, Accuracy: 9248/10000 (92.48%)

Test set: Average loss: 0.0002, Accuracy: 9426/10000 (94.26%)

Test set: Average loss: 0.0002, Accuracy: 9463/10000 (94.63%)

Test set: Average loss: