사전구성

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import os

In [2]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),  #CIFAR 32x32 -> 224x224
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4865, 0.4409),  # CIFAR-100 평균
                         (0.2673, 0.2564, 0.2761))  # CIFAR-100 표준편차
])

In [3]:
trainset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=256, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=256, shuffle=False, num_workers=2)

print("클래스 개수:", len(trainset.classes))    # 100
print("예시 클래스:", trainset.classes[:10])    # 앞 10개 클래스
print(trainset[0][0].shape)     #이미지 크기

클래스 개수: 100
예시 클래스: ['apple', 'aquarium_fish', 'baby', 'bear', 'beaver', 'bed', 'bee', 'beetle', 'bicycle', 'bottle']
torch.Size([3, 224, 224])


모듈 구성

In [4]:
class myVGG13(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1_1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.layer1_2 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
        
        self.maxpool1 = nn.MaxPool2d(2)

        self.layer2_1 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.layer2_2 = nn.Conv2d(128, 128, kernel_size=3, padding=1)

        self.maxpool2 = nn.MaxPool2d(2)

        self.layer3_1 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.layer3_2 = nn.Conv2d(256, 256, kernel_size=3, padding=1)

        self.maxpool3 = nn.MaxPool2d(2)

        self.layer4_1 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
        self.layer4_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1)

        self.maxpool4 = nn.MaxPool2d(2)

        self.layer5_1 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.layer5_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1)

        self.maxpool5 = nn.MaxPool2d(2)

        self.flatten = nn.Flatten()

        self.layer6 = nn.Linear(7*7*512, 4096)
        self.dropout6 = nn.Dropout(p=0.5)
        self.layer7 = nn.Linear(4096, 4096)
        self.dropout7 = nn.Dropout(p=0.5)
        self.layer8 = nn.Linear(4096, 100)  #CIFAR은 100 class

    def forward(self, x):
        x = F.relu(self.layer1_1(x))
        x = F.relu(self.layer1_2(x))

        x = self.maxpool1(x)

        x = F.relu(self.layer2_1(x))
        x = F.relu(self.layer2_2(x))

        x = self.maxpool2(x)

        x = F.relu(self.layer3_1(x))
        x = F.relu(self.layer3_2(x))

        x = self.maxpool3(x)

        x = F.relu(self.layer4_1(x))
        x = F.relu(self.layer4_2(x))

        x = self.maxpool4(x)

        x = F.relu(self.layer5_1(x))
        x = F.relu(self.layer5_2(x))

        x = self.maxpool5(x)
        x = self.flatten(x)

        x = F.relu(self.layer6(x))
        x = self.dropout6(x)

        x = F.relu(self.layer7(x))
        x = self.dropout7(x)

        return self.layer8(x)

In [5]:
train_loss_history = []
train_acc_history = []
val_loss_history = []
val_acc_history = []
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = myVGG13().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=5e-2)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=5)
scaler = torch.amp.GradScaler(device.type)
epochs = 5

학습 및 평가

In [6]:
from tqdm.auto import tqdm
import time
#tqdm 테스트
for i in tqdm(range(50), desc="smoke test", dynamic_ncols=True):
    time.sleep(0.01)

smoke test:   0%|          | 0/50 [00:00<?, ?it/s]

In [None]:


@torch.no_grad()
def evaluate(model, loader, device):
    model.eval()
    total, correct, loss_sum = 0, 0, 0.0
    criterion = nn.CrossEntropyLoss()

    # 평가도 진행률 보이게
    pbar = tqdm(loader, desc="Val", leave=False)
    for x, y in pbar:
        x, y = x.to(device, non_blocking=True), y.to(device, non_blocking=True)
        logits = model(x)
        loss = criterion(logits, y)
        pred = logits.argmax(1)
        total += y.size(0)
        correct += (pred == y).sum().item()
        loss_sum += loss.item() * y.size(0)

        cur_loss = loss_sum / total
        cur_acc  = correct / total * 100
        pbar.set_postfix(loss=f"{cur_loss:.4f}", acc=f"{cur_acc:.2f}%")

    return loss_sum / total, correct / total

# ============================================================
# 학습 루프 (진행률/ETA/속도/메모리 사용량 표시)
# ============================================================

best_acc = 0.0
os.makedirs('checkpoints', exist_ok=True)

for epoch in range(1, epochs+1):
    model.train()
    total, correct, run_loss = 0, 0, 0.0

    start = time.time()
    # tqdm 진행바: 전체 배치 수 기준
    pbar = tqdm(trainloader, desc=f"Train {epoch:02d}/{epochs}", leave=False)

    for i, (x, y) in enumerate(pbar, 1):
        x, y = x.to(device, non_blocking=True), y.to(device, non_blocking=True)
        optimizer.zero_grad(set_to_none=True)

        with torch.amp.autocast(device_type=device.type, enabled=(device.type=='cuda')):
            logits = model(x)
            loss = criterion(logits, y)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        # 통계 갱신
        run_loss += loss.item() * y.size(0)
        pred = logits.argmax(1)
        total += y.size(0)
        correct += (pred == y).sum().item()

        # 현재 스텝 지표
        train_loss = run_loss / total
        train_acc  = correct / total * 100.0
        cur_lr     = scheduler.get_last_lr()[0] if hasattr(scheduler, "get_last_lr") else optimizer.param_groups[0]["lr"]

        # 속도/메모리(선택)
        elapsed = time.time() - start
        ips = total / max(elapsed, 1e-9)  # images/sec
        if device.type == 'cuda':
            mem_mb = torch.cuda.memory_allocated() / (1024**2)
            mem_txt = f"{mem_mb:.0f}MB"
        else:
            mem_txt = "CPU"

        # 진행바 우측에 지표 출력
        pbar.set_postfix({
            "loss": f"{train_loss:.4f}",
            "acc": f"{train_acc:.2f}%",
            "lr": f"{cur_lr:.5f}",
            "ips": f"{ips:.0f}/s",
            "mem": mem_txt
        })

    # 스케줄러 스텝 (에폭 끝)
    scheduler.step()

    # 에폭 요약 출력
    val_loss, val_acc = evaluate(model, testloader, device)
    print(f"[{epoch:02d}/{epochs}] "
          f"train_loss={train_loss:.4f} train_acc={train_acc:.2f}% | "
          f"val_loss={val_loss:.4f} val_acc={val_acc*100:.2f}% | "
          f"lr={scheduler.get_last_lr()[0]:.5f}")

    train_loss_history.append(train_loss)
    train_acc_history.append(train_acc/100.0)
    val_loss_history.append(val_loss)
    val_acc_history.append(val_acc)

    if val_acc > best_acc:
        best_acc = val_acc
        torch.save({
            'epoch': epoch,
            'model_state': model.state_dict(),
            'optimizer_state': optimizer.state_dict(),
            'val_acc': val_acc,
        }, "checkpoints/best_vgg13_cifar100.pt")

print(f"Best val_acc: {best_acc*100:.2f}%")

Train 01/5:   0%|          | 0/196 [00:09<?, ?it/s]

Val:   0%|          | 0/40 [00:10<?, ?it/s]

[01/5] train_loss=4.2874 train_acc=4.00% | val_loss=3.8881 val_acc=8.41% | lr=0.00090


Train 02/5:   0%|          | 0/196 [00:11<?, ?it/s]

Val:   0%|          | 0/40 [00:10<?, ?it/s]

[02/5] train_loss=3.7428 train_acc=11.31% | val_loss=3.4667 val_acc=16.67% | lr=0.00065


Train 03/5:   0%|          | 0/196 [00:11<?, ?it/s]

Val:   0%|          | 0/40 [00:10<?, ?it/s]

[03/5] train_loss=3.3807 train_acc=18.04% | val_loss=3.2226 val_acc=21.96% | lr=0.00035


Train 04/5:   0%|          | 0/196 [00:11<?, ?it/s]

그래프

In [1]:
import matplotlib.pyplot as plt

epochs_range = range(1, epochs+1)

# Loss 그래프
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(epochs_range, train_loss_history, label="Train Loss")
plt.plot(epochs_range, val_loss_history, label="Val Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Loss Curve")
plt.legend()
plt.grid(True)

# Accuracy 그래프
plt.subplot(1, 2, 2)
plt.plot(epochs_range, train_acc_history, label="Train Acc")
plt.plot(epochs_range, val_acc_history, label="Val Acc")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Accuracy Curve")
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

NameError: name 'epochs' is not defined