In [1]:
# 1️⃣ 라이브러리 불러오기
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# 2️⃣ 데이터셋 준비
BATCH_SIZE = 64

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 3️⃣ 학습/평가 함수 정의
def train(model, loader, criterion, optimizer, epochs=5):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            output = model(images)
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f'Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(loader):.4f}')

def evaluate(model, loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            output = model(images)
            pred = output.argmax(dim=1)
            correct += (pred == labels).sum().item()
            total += labels.size(0)
    print(f'Accuracy: {correct/total:.4f}')

# 4️⃣ 원래 모델 정의 (ReLU 포함)
class MNISTModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 10),
            nn.LogSoftmax(dim=1)
        )

    def forward(self, x):
        x = x.view(x.size(0), -1)
        return self.model(x)

model = MNISTModel().to(device)
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

print("==== 원래 모델 학습 ====")
train(model, train_loader, criterion, optimizer, epochs=5)
print("==== 원래 모델 평가 ====")
evaluate(model, test_loader)

# 5️⃣ ReLU 제거 모델 정의
class MNISTModelNoReLU(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(28*28, 512),
            # nn.ReLU(),  # 주석 처리
            nn.Linear(512, 256),
            # nn.ReLU(),  # 주석 처리
            nn.Linear(256, 10),
            nn.LogSoftmax(dim=1)
        )

    def forward(self, x):
        x = x.view(x.size(0), -1)
        return self.model(x)

model_no_relu = MNISTModelNoReLU().to(device)
optimizer_no_relu = optim.Adam(model_no_relu.parameters(), lr=0.001)

print("\n==== ReLU 제거 모델 학습 ====")
train(model_no_relu, train_loader, criterion, optimizer_no_relu, epochs=5)
print("==== ReLU 제거 모델 평가 ====")
evaluate(model_no_relu, test_loader)

100%|██████████| 9.91M/9.91M [00:00<00:00, 53.8MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 1.75MB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 14.5MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 6.90MB/s]


==== 원래 모델 학습 ====
Epoch 1/5, Loss: 0.2986
Epoch 2/5, Loss: 0.1343
Epoch 3/5, Loss: 0.1006
Epoch 4/5, Loss: 0.0829
Epoch 5/5, Loss: 0.0706
==== 원래 모델 평가 ====
Accuracy: 0.9776

==== ReLU 제거 모델 학습 ====
Epoch 1/5, Loss: 0.4471
Epoch 2/5, Loss: 0.3753
Epoch 3/5, Loss: 0.3480
Epoch 4/5, Loss: 0.3384
Epoch 5/5, Loss: 0.3291
==== ReLU 제거 모델 평가 ====
Accuracy: 0.9123
