In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [None]:
# ========================================
# 2. 환경 및 데이터 준비
# ========================================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

transform = transforms.ToTensor()

train_data = datasets.MNIST(root='../data', train=True, download=True, transform=transform)
test_data  = datasets.MNIST(root='../data', train=False, download=True, transform=transform)

In [None]:
train_data.data.shape

torch.Size([60000, 28, 28])

In [None]:
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader  = DataLoader(test_data, batch_size=64, shuffle=False)

len(train_loader), len(test_loader)

(938, 157)

In [None]:
train_loader.dataset.data.shape

torch.Size([60000, 28, 28])

In [None]:
# class MNIST_Linear(nn.Module):
#     def __init__(self):
#         super().__init__()

#         self.classifier = nn.Sequential(
#             nn.Flatten(),       # (B, 28, 28) → (B, 784)
#             nn.Linear(784, 128),
#             nn.ReLU(),
#             nn.Linear(128, 10)
#         )

#     def forward(self, x):
#         x = self.classifier(x)
#         return x

model =  nn.Sequential(
            nn.Flatten(),       # (B, 28, 28) → (B, 784)
            nn.Linear(784, 128),
            nn.ReLU(),
            nn.Linear(128, 10)
        )
model.to(device)

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=128, bias=True)
  (2): ReLU()
  (3): Linear(in_features=128, out_features=10, bias=True)
)

In [None]:
# ========================================
# 3. 모델, 손실 함수, 옵티마이저 정의
# ========================================
# model = MNIST_Linear().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
# ========================================
# 4. 학습 루프
# ========================================
for epoch in range(1, 6):  # 5 epochs
    model.train()
    total_loss = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() / len(train_loader)

    print(f"Epoch {epoch}, Loss: {total_loss:.4f}")

Epoch 1, Loss: 0.3421
Epoch 2, Loss: 0.1575
Epoch 3, Loss: 0.1104
Epoch 4, Loss: 0.0829
Epoch 5, Loss: 0.0653


In [None]:
# ========================================
# 5. 테스트 평가
# ========================================
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")

Test Accuracy: 97.56%
