In [1]:
import torch
from torch import nn, optim     # layer / optimizer 라이브러리
import torch.nn.functional as F
from torch.utils.data import DataLoader     # dataset 가져오기  /MNIST를 가져오는 곳
from torchvision import datasets, transforms    #데이터 전처리
import matplotlib.pyplot as plt    #시각화
from torch import random
import numpy as np
from torch.utils.data import random_split

In [2]:
# 디바이스 설정 (CUDA가 있으면 GPU, 없으면 CPU 사용)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [3]:
# data 값을 0~1 사이 값으로 전처리(normalization)
# 데이터 전처리: 이미지 데이터를 Tensor로 변환하고 정규화

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5), (0.5))]) #mean 0.5, std 0.5 #전처리 정규화

In [4]:
#   train: paramter에 영향 有
# train=True로 설정하면 학습용 데이터셋, train=False는 테스트용 데이터셋
#   download = true : 다운로드해서 실행
#   transform = transform : 위에서 실행한 전처리를 적용한 채로, 데이터를 다운로드

train_data = datasets.MNIST(root='./', train=True, download=True, transform=transform)
test_data = datasets.MNIST(root='./', train=False, download=True, transform=transform)

100%|██████████| 9.91M/9.91M [00:01<00:00, 6.06MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 160kB/s]
100%|██████████| 1.65M/1.65M [00:01<00:00, 1.30MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 6.15MB/s]


In [5]:
# 학습, 검증 데이터 나누기
train_size = int(0.8 * len(train_data))
val_size = len(train_data) - train_size
train_data, val_data = random_split(train_data, [train_size, val_size])

In [6]:
# 데이터로더 정의
batch_size = 64
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=2)  # 학습용 데이터 로더
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False, num_workers=2)     # 검증용 데이터 로더
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False, num_workers=2)   # 테스트용 데이터 로더

In [7]:
# 하이퍼파라미터
input_size = 28 * 28
hidden_size = 128      # 은닉층 크기
num_classes = 10       # MNIST는 0~9까지 10개의 클래스
num_epochs = 20        # 학습 횟수
learning_rate = 0.001  # 학습률

In [8]:
# 모델 정의
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes, dropout_rate=0.3):
        super(MLP, self).__init__()
        self.input_layer = nn.Linear(input_size, hidden_size)       # 입력층
        self.hidden_layer = nn.Linear(hidden_size, hidden_size)     # 은닉층
        self.output_layer = nn.Linear(hidden_size, num_classes)     # 출력층
        self.activation = nn.ReLU()                                 # 활성화 함수 ReLU (비선형 함수)
        self.dropout = nn.Dropout(dropout_rate)                     # Dropout: 과적합 방지를 위해 학습 중 일부 뉴런을 랜덤하게 끄는 기법

    def forward(self, x):
        x = x.view(-1, input_size)                                  # 입력 데이터 1차원으로 펼침
        x = self.input_layer(x)
        x = self.activation(x)
        x = self.dropout(x)

        x = self.hidden_layer(x)
        x = self.activation(x)
        x = self.dropout(x)

        x = self.output_layer(x)
        return x

In [9]:
# 모델 생성 및 디바이스로 이동
model = MLP(input_size, hidden_size, num_classes)
model.to(device)

MLP(
  (input_layer): Linear(in_features=784, out_features=128, bias=True)
  (hidden_layer): Linear(in_features=128, out_features=128, bias=True)
  (output_layer): Linear(in_features=128, out_features=10, bias=True)
  (activation): ReLU()
  (dropout): Dropout(p=0.3, inplace=False)
)

In [10]:
# 손실 함수 및 옵티마이저
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)    # 옵티마이저를 Adam으로 설정

In [11]:
# 학습 시작
best_val_acc = 0
best_model = None

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)          # 예측값과 실제값의 손실을 계산

        optimizer.zero_grad()                      # 기울기 초기화
        loss.backward()                            # 기울기 계산
        optimizer.step()                           # 가중치 업데이트

        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_acc = 100 * correct / total
    avg_train_loss = running_loss / len(train_loader)

    # 검증
    model.eval()
    val_correct = 0
    val_total = 0

    with torch.no_grad():                        # 검증 시 기울기 계산 안함
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()

    val_acc = 100 * val_correct / val_total

    # 최적 모델 저장
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_model = model.state_dict()

    print(f"Epoch [{epoch+1}/{num_epochs}], "
          f"Train Loss: {avg_train_loss:.4f}, "
          f"Train Acc: {train_acc:.2f}%, "
          f"Val Acc: {val_acc:.2f}%")

Epoch [1/20], Train Loss: 0.5617, Train Acc: 82.43%, Val Acc: 92.08%
Epoch [2/20], Train Loss: 0.3140, Train Acc: 90.34%, Val Acc: 94.33%
Epoch [3/20], Train Loss: 0.2665, Train Acc: 91.88%, Val Acc: 94.60%
Epoch [4/20], Train Loss: 0.2400, Train Acc: 92.64%, Val Acc: 95.22%
Epoch [5/20], Train Loss: 0.2206, Train Acc: 93.21%, Val Acc: 95.77%
Epoch [6/20], Train Loss: 0.2063, Train Acc: 93.65%, Val Acc: 95.92%
Epoch [7/20], Train Loss: 0.1981, Train Acc: 93.88%, Val Acc: 95.90%
Epoch [8/20], Train Loss: 0.1901, Train Acc: 94.16%, Val Acc: 96.21%
Epoch [9/20], Train Loss: 0.1826, Train Acc: 94.25%, Val Acc: 96.25%
Epoch [10/20], Train Loss: 0.1816, Train Acc: 94.41%, Val Acc: 96.02%
Epoch [11/20], Train Loss: 0.1734, Train Acc: 94.78%, Val Acc: 96.55%
Epoch [12/20], Train Loss: 0.1678, Train Acc: 94.80%, Val Acc: 96.40%
Epoch [13/20], Train Loss: 0.1637, Train Acc: 94.97%, Val Acc: 96.77%
Epoch [14/20], Train Loss: 0.1599, Train Acc: 95.05%, Val Acc: 96.40%
Epoch [15/20], Train Loss: 0.

In [12]:
# 최적 모델 로드
model.load_state_dict(best_model)

<All keys matched successfully>

In [14]:
# 테스트 정확도 측정
model.eval()
test_correct = 0
test_total = 0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        test_total += labels.size(0)
        test_correct += (predicted == labels).sum().item()

test_acc = 100 * test_correct / test_total
print(f"Final Test Accuracy: {test_acc:.2f}%")

Final Test Accuracy: 96.99%
