In [1]:
# 모델을 다운로드 받고 모델이 제대로 작동하는지 확인하는 명령어입니다.
# 가상환경이 활성화된 터미널에서 아래 명령어를 실행하세요.
# python -m unittest tests/test_model.py
#-------------------------
# Ran 1 test in 8.179s
# OK
#-------------------------
# 위와 같은 결과를 얻었다면 모델이 정상적으로 작동하는 것입니다.
# 코드 중 담당하신 MODEL_NAME에 해당하는 주석(#)을 제거하고 실행해 주세요.

In [2]:
import os, torch                                            # [S1]
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms
from pathlib import Path

from core.models.model_factory import create_model
from core.data.dataset import EmotionDataset
# from core.training.trainer import train_model              # [S2] 필요 시 주석 해제
from core.training.trainer_v2 import train_model            # [S2] AMP/비동기/클리핑 지원


In [3]:
if __name__ == '__main__':
    # 장치
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {DEVICE}")

    # 전역 성능 플래그 (Ampere↑ 권장)                           # [S3]
    torch.backends.cudnn.benchmark = True
    torch.backends.cuda.matmul.allow_tf32 = True
    if hasattr(torch.backends.cudnn, "allow_tf32"):
        torch.backends.cudnn.allow_tf32 = True
    try:
        torch.set_float32_matmul_precision('high')  # PyTorch 2.0+
    except Exception:
        pass

    # 데이터 경로/모델 선택
    DATA_DIR = Path("./datasets/korean_emotion_complex_vision_5_percent_verified_processed")
    # MODEL_NAME = 'resnet18'
    # MODEL_NAME = 'resnet50'
    # MODEL_NAME = 'mobilenet_v3_small'
    # MODEL_NAME = 'shufflenet_v2'
    MODEL_NAME = 'efficientnet_v2_s'
    # MODEL_NAME = 'squeezenet'

    NUM_CLASSES = 7
    BATCH_SIZE = 32
    LEARNING_RATE = 0.001

    # --- Epoch 슬라이더(가능 시) --------------------------------  # [S4]
    NUM_EPOCHS_DEFAULT = 100
    try:
        from ipywidgets import IntSlider, display
        _epoch_slider = IntSlider(description='Epochs', value=NUM_EPOCHS_DEFAULT, min=1, max=200, step=1)
        display(_epoch_slider)
        NUM_EPOCHS = _epoch_slider.value
    except Exception:
        NUM_EPOCHS = NUM_EPOCHS_DEFAULT

    EARLY_STOPPING_PATIENCE = 10

    # Transform
    train_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomRotation(15),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
    ])
    val_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
    ])

    # Dataset
    train_dataset = EmotionDataset(data_dir=DATA_DIR / "train", transform=train_transform)
    val_dataset   = EmotionDataset(data_dir=DATA_DIR / "val",   transform=val_transform)

    # DataLoader I/O 튜닝                                         # [S5]
    NUM_WORKERS = min(4, os.cpu_count() or 2)
    PIN_MEMORY = True
    PERSISTENT = True if NUM_WORKERS > 0 else False
    PREFETCH = 2 if NUM_WORKERS > 0 else None

    def build_loader(ds, bs, shuffle):
        return DataLoader(
            ds, batch_size=bs, shuffle=shuffle,
            num_workers=NUM_WORKERS,
            pin_memory=PIN_MEMORY,
            persistent_workers=PERSISTENT,
            prefetch_factor=PREFETCH,
            drop_last=shuffle,  # train에서만 True → BN 안정화(선택)
        )

    train_loader = build_loader(train_dataset, BATCH_SIZE, True)
    val_loader   = build_loader(val_dataset,   BATCH_SIZE, False)

    # 클래스 수 동기화(폴더명 기반)                                # [S6]
    NUM_CLASSES = len(train_dataset.classes)
    print("데이터 준비 완료!")
    print(f"훈련 데이터셋 크기: {len(train_dataset)}")
    print(f"클래스 수: {NUM_CLASSES} -> {train_dataset.classes}")

    # 모델/손실/옵티마
    model = create_model(model_name=MODEL_NAME, num_classes=NUM_CLASSES).to(DEVICE)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

    print(f"'{MODEL_NAME}' 모델, 손실 함수, 옵티마이저 준비 완료!")

    # 학습 실행 (AMP/클리핑/로깅/체크포인트/TF32·cudnn)              # [S7]
    os.makedirs("checkpoints", exist_ok=True)
    print("\n모델 훈련을 시작합니다...")
    trained_model = train_model(
        model, train_loader, val_loader,
        criterion, optimizer, DEVICE,
        num_epochs=NUM_EPOCHS,
        patience=EARLY_STOPPING_PATIENCE,
        use_amp=True, amp_in_val=True,
        grad_clip=1.0, log_interval=50,
        save_best_path="checkpoints/best.pt",
        set_tf32=True, cudnn_benchmark=True
    )

    # torch.save(trained_model.state_dict(), f'{MODEL_NAME}_trained.pth')
    # print("훈련된 모델 가중치가 저장되었습니다.")
# %%

Using device: cuda
데이터 준비 완료!
훈련 데이터셋 크기: 17975
클래스 수: 7 -> ['기쁨', '당황', '분노', '불안', '상처', '슬픔', '중립']
'efficientnet_v2_s' 모델, 손실 함수, 옵티마이저 준비 완료!

모델 훈련을 시작합니다...
Epoch 1/100
----------


  scaler = GradScaler(enabled=amp_enabled)                 # [A9]
  with autocast(enabled=amp_enabled):                         # [A14]


[E1 B50] loss=1.7420 acc=0.302
[E1 B100] loss=1.6210 acc=0.373
[E1 B150] loss=1.5597 acc=0.402
[E1 B200] loss=1.5029 acc=0.428
[E1 B250] loss=1.4543 acc=0.450
[E1 B300] loss=1.4199 acc=0.465
[E1 B350] loss=1.3894 acc=0.477
[E1 B400] loss=1.3692 acc=0.484
[E1 B450] loss=1.3480 acc=0.495
[E1 B500] loss=1.3320 acc=0.502
[E1 B550] loss=1.3180 acc=0.506
Train Loss: 1.3131 Acc: 0.5070


  with autocast(enabled=amp_enabled and amp_in_val):           # [A18]


Val Loss: 1.1514 Acc: 0.5849

  -> Val Loss 개선됨! (1.1514)
Epoch 2/100
----------
[E2 B50] loss=1.1257 acc=0.593
[E2 B100] loss=1.0960 acc=0.604
[E2 B150] loss=1.1016 acc=0.600
[E2 B200] loss=1.0997 acc=0.598
[E2 B250] loss=1.0937 acc=0.599
[E2 B300] loss=1.0924 acc=0.598
[E2 B350] loss=1.0912 acc=0.601
[E2 B400] loss=1.0884 acc=0.601
[E2 B450] loss=1.0845 acc=0.602
[E2 B500] loss=1.0772 acc=0.605
[E2 B550] loss=1.0755 acc=0.606
Train Loss: 1.0739 Acc: 0.6058
Val Loss: 1.0385 Acc: 0.6215

  -> Val Loss 개선됨! (1.0385)
Epoch 3/100
----------
[E3 B50] loss=0.9966 acc=0.637
[E3 B100] loss=0.9879 acc=0.638
[E3 B150] loss=0.9842 acc=0.638
[E3 B200] loss=0.9878 acc=0.634
[E3 B250] loss=0.9929 acc=0.634
[E3 B300] loss=0.9853 acc=0.636
[E3 B350] loss=0.9829 acc=0.637
[E3 B400] loss=0.9865 acc=0.637
[E3 B450] loss=0.9890 acc=0.635
[E3 B500] loss=0.9873 acc=0.636
[E3 B550] loss=0.9897 acc=0.637
Train Loss: 0.9895 Acc: 0.6353
Val Loss: 1.0136 Acc: 0.6330

  -> Val Loss 개선됨! (1.0136)
Epoch 4/100
----

In [4]:
os.cpu_count()

22