In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader, random_split

# 디바이스 설정 (GPU 사용가능 시 GPU 사용)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 0. MNIST 데이터셋 로드 및 70% 학습 / 30% 테스트 분할
transform = transforms.ToTensor()
dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
train_size = int(0.7 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# 1. MLP 모델 정의: 2개의 은닉층, 각 은닉층 20개 노드
class MLP(nn.Module):
    def __init__(self, activation='relu', use_bn=False):
        super(MLP, self).__init__()
        self.use_bn = use_bn
        # 활성화 함수 설정: 'relu' 혹은 'sigmoid'
        if activation.lower() == 'relu':
            self.act = nn.ReLU()
        elif activation.lower() == 'sigmoid':
            self.act = nn.Sigmoid()
        else:
            raise ValueError("지원하지 않는 활성화 함수입니다.")

        # 입력층 (28x28=784) -> 은닉층1 (20)
        self.fc1 = nn.Linear(28*28, 20)
        if use_bn:
            self.bn1 = nn.BatchNorm1d(20)
        # 은닉층1 (20) -> 은닉층2 (20)
        self.fc2 = nn.Linear(20, 20)
        if use_bn:
            self.bn2 = nn.BatchNorm1d(20)
        # 은닉층2 (20) -> 출력층 (10)
        self.fc3 = nn.Linear(20, 10)

    def forward(self, x):
        x = x.view(x.size(0), -1)  # [batch_size, 784]로 펼치기
        x = self.fc1(x)
        if self.use_bn:
            x = self.bn1(x)
        x = self.act(x)
        x = self.fc2(x)
        if self.use_bn:
            x = self.bn2(x)
        x = self.act(x)
        x = self.fc3(x)
        return x

# 4. 가중치 초기화 함수: 카이밍, 제이비어, 정규분포 초기화
def initialize_weights(model, init_type='kaiming'):
    for m in model.modules():
        if isinstance(m, nn.Linear):
            if init_type.lower() == 'kaiming':
                nn.init.kaiming_normal_(m.weight)
            elif init_type.lower() == 'xavier':
                nn.init.xavier_normal_(m.weight)
            elif init_type.lower() == 'normal':
                nn.init.normal_(m.weight, mean=0.0, std=0.01)
            else:
                raise ValueError("지원하지 않는 초기화 방법입니다.")
            if m.bias is not None:
                nn.init.constant_(m.bias, 0)

# 학습 및 평가 함수 (예제에서는 3 에포크 진행)
def train_and_evaluate(model, train_loader, test_loader, epochs=3):
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # 학습 루프
    model.train()
    for epoch in range(epochs):
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    # 평가 루프
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    return accuracy

# 실험 설정: 활성화 함수, Batch Normalization 적용 여부, 가중치 초기화 방법
activations = ['relu', 'sigmoid']
batch_norm_options = [False, True]
init_types = ['kaiming', 'xavier', 'normal']

results = {}

# 각 설정에 대해 모델 생성, 초기화, 학습, 평가 진행
for act in activations:
    for bn in batch_norm_options:
        for init in init_types:
            config = f"Activation: {act}, BatchNorm: {bn}, Init: {init}"
            print(f"실험 진행: {config}")
            model = MLP(activation=act, use_bn=bn)
            initialize_weights(model, init_type=init)
            acc = train_and_evaluate(model, train_loader, test_loader, epochs=3)
            results[config] = acc
            print(f"테스트 정확도: {acc:.2f}%")
            print("-" * 50)

print("전체 실험 결과:")
for config, acc in results.items():
    print(f"{config} --> {acc:.2f}%")


100%|██████████| 9.91M/9.91M [00:00<00:00, 41.6MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 1.21MB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 10.3MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 7.83MB/s]


실험 진행: Activation: relu, BatchNorm: False, Init: kaiming
테스트 정확도: 93.39%
--------------------------------------------------
실험 진행: Activation: relu, BatchNorm: False, Init: xavier
테스트 정확도: 94.14%
--------------------------------------------------
실험 진행: Activation: relu, BatchNorm: False, Init: normal
테스트 정확도: 89.52%
--------------------------------------------------
실험 진행: Activation: relu, BatchNorm: True, Init: kaiming
테스트 정확도: 94.92%
--------------------------------------------------
실험 진행: Activation: relu, BatchNorm: True, Init: xavier
테스트 정확도: 95.01%
--------------------------------------------------
실험 진행: Activation: relu, BatchNorm: True, Init: normal
테스트 정확도: 95.14%
--------------------------------------------------
실험 진행: Activation: sigmoid, BatchNorm: False, Init: kaiming
테스트 정확도: 91.58%
--------------------------------------------------
실험 진행: Activation: sigmoid, BatchNorm: False, Init: xavier
테스트 정확도: 90.76%
--------------------------------------------------
실험 진행: Act