In [1]:
import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn.init

device = 'cuda' if torch.cuda.is_available() else 'cpu'
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

In [2]:
# 입력 정규화 추가 (평균=0.5, 표준편차=0.5)
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = dsets.FashionMNIST(root='FashionMNIST_data/',
                                    train=True,
                                    transform=transform,
                                    download=True)
test_dataset = dsets.FashionMNIST(root='FashionMNIST_data/',
                                   train=False,
                                   transform=transform,
                                   download=True)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)

100%|██████████| 26.4M/26.4M [00:02<00:00, 11.3MB/s]
100%|██████████| 29.5k/29.5k [00:00<00:00, 203kB/s]
100%|██████████| 4.42M/4.42M [00:01<00:00, 3.73MB/s]
100%|██████████| 5.15k/5.15k [00:00<00:00, 9.75MB/s]


In [3]:
class CNN_Normalize(nn.Module):
    def __init__(self):
        super(CNN_Normalize, self).__init__()
        self.layer1 = nn.Sequential(nn.Conv2d(1,32,3,1,1), nn.ReLU(), nn.MaxPool2d(2))
        self.layer2 = nn.Sequential(nn.Conv2d(32,64,3,1,1), nn.ReLU(), nn.MaxPool2d(2))
        self.layer3 = nn.Sequential(nn.Conv2d(64,128,3,1,1), nn.ReLU(), nn.MaxPool2d(2))
        self.fc1 = nn.Linear(3*3*128, 625)
        self.fc2 = nn.Linear(625, 10)
        torch.nn.init.xavier_uniform_(self.fc1.weight)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.fc2(x)
        return x

model = CNN_Normalize().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [4]:
# 학습
for epoch in range(20):
    avg_cost = 0
    for x, y in train_loader:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        output = model(x)
        cost = criterion(output, y)
        cost.backward()
        optimizer.step()
        avg_cost += cost / len(train_loader)
    print(f"[Epoch {epoch+1}] Cost: {avg_cost:.4f}")

[Epoch 1] Cost: 0.4020
[Epoch 2] Cost: 0.2714
[Epoch 3] Cost: 0.2335
[Epoch 4] Cost: 0.2054
[Epoch 5] Cost: 0.1844
[Epoch 6] Cost: 0.1656
[Epoch 7] Cost: 0.1456
[Epoch 8] Cost: 0.1336
[Epoch 9] Cost: 0.1233
[Epoch 10] Cost: 0.1097
[Epoch 11] Cost: 0.1026
[Epoch 12] Cost: 0.0914
[Epoch 13] Cost: 0.0848
[Epoch 14] Cost: 0.0778
[Epoch 15] Cost: 0.0758
[Epoch 16] Cost: 0.0689
[Epoch 17] Cost: 0.0640
[Epoch 18] Cost: 0.0641
[Epoch 19] Cost: 0.0595
[Epoch 20] Cost: 0.0583


In [5]:
# 테스트
with torch.no_grad():
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)
    correct = 0
    total = 0
    for x, y in test_loader:
        x, y = x.to(device), y.to(device)
        output = model(x)
        pred = output.argmax(dim=1)
        correct += (pred == y).sum().item()
        total += y.size(0)
    print(f"Normalize 적용 모델 정확도: {100 * correct / total:.2f}%")

✅ Normalize 적용 모델 정확도: 90.39%


모델 정확도 90.39%

In [6]:
import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn.init

# 디바이스 설정
device = 'cuda' if torch.cuda.is_available() else 'cpu'
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)
print(f"Device: {device}")

# 데이터셋 로드 (Normalize 없이 ToTensor만 사용)
transform = transforms.ToTensor()

train_dataset = dsets.FashionMNIST(root='FashionMNIST_data/',
                                    train=True,
                                    transform=transform,
                                    download=True)
test_dataset = dsets.FashionMNIST(root='FashionMNIST_data/',
                                   train=False,
                                   transform=transform,
                                   download=True)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)

Device: cuda


In [7]:
# He 초기화를 적용한 CNN 모델 정의
class CNN_HeInit(nn.Module):
    def __init__(self):
        super(CNN_HeInit, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 32, 3, 1, 1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, 3, 1, 1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(64, 128, 3, 1, 1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.fc1 = nn.Linear(3*3*128, 625)
        self.fc2 = nn.Linear(625, 10)

        # He 초기화 적용 (ReLU에 적합)
        nn.init.kaiming_uniform_(self.fc1.weight, nonlinearity='relu')
        nn.init.kaiming_uniform_(self.fc2.weight, nonlinearity='relu')

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.fc2(x)
        return x

In [8]:
# 모델 정의 및 학습 설정
model = CNN_HeInit().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [9]:
# 학습과 테스트 정확도
for epoch in range(1, 21):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for x, y in train_loader:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        outputs = model(x)
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += y.size(0)
        correct += (predicted == y).sum().item()

    train_loss = running_loss / len(train_loader)
    train_accuracy = 100 * correct / total

    # 테스트 정확도
    model.eval()
    test_correct = 0
    test_total = 0
    with torch.no_grad():
        for x_test, y_test in test_loader:
            x_test, y_test = x_test.to(device), y_test.to(device)
            test_outputs = model(x_test)
            _, predicted = torch.max(test_outputs.data, 1)
            test_total += y_test.size(0)
            test_correct += (predicted == y_test).sum().item()

    test_accuracy = 100 * test_correct / test_total

    print(f"Epoch [{epoch}/20] - Loss: {train_loss:.4f} | Train Acc: {train_accuracy:.2f}% | Test Acc: {test_accuracy:.2f}%")

Epoch [1/20] - Loss: 0.4161 | Train Acc: 84.64% | Test Acc: 88.22%
Epoch [2/20] - Loss: 0.2746 | Train Acc: 90.02% | Test Acc: 90.02%
Epoch [3/20] - Loss: 0.2357 | Train Acc: 91.42% | Test Acc: 90.28%
Epoch [4/20] - Loss: 0.2127 | Train Acc: 92.28% | Test Acc: 90.25%
Epoch [5/20] - Loss: 0.1924 | Train Acc: 93.02% | Test Acc: 91.24%
Epoch [6/20] - Loss: 0.1751 | Train Acc: 93.51% | Test Acc: 91.21%
Epoch [7/20] - Loss: 0.1578 | Train Acc: 94.20% | Test Acc: 90.76%
Epoch [8/20] - Loss: 0.1441 | Train Acc: 94.62% | Test Acc: 91.13%
Epoch [9/20] - Loss: 0.1316 | Train Acc: 95.18% | Test Acc: 90.48%
Epoch [10/20] - Loss: 0.1218 | Train Acc: 95.38% | Test Acc: 91.00%
Epoch [11/20] - Loss: 0.1095 | Train Acc: 95.92% | Test Acc: 90.85%
Epoch [12/20] - Loss: 0.1024 | Train Acc: 96.16% | Test Acc: 91.16%
Epoch [13/20] - Loss: 0.0935 | Train Acc: 96.52% | Test Acc: 90.72%
Epoch [14/20] - Loss: 0.0881 | Train Acc: 96.79% | Test Acc: 90.73%
Epoch [15/20] - Loss: 0.0823 | Train Acc: 97.08% | Test A

In [10]:
print(f"최종 결과 - Loss: {train_loss:.4f} | Train Accuracy: {train_accuracy:.2f}% | Test Accuracy: {test_accuracy:.2f}%")

✅ 최종 결과 - Loss: 0.0635 | Train Accuracy: 97.62% | Test Accuracy: 90.24%


모델 정확도 90.24%
최종 결과 - Loss: 0.0635 | Train Accuracy: 97.62% | Test Accuracy: 90.24%