In [2]:
# 모델을 다운로드 받고 모델이 제대로 작동하는지 확인하는 명령어입니다.
# 가상환경이 활성화된 터미널에서 아래 명령어를 실행하세요.
# python -m unittest tests/test_model.py
#-------------------------
# Ran 1 test in 8.179s
# OK
#-------------------------
# 위와 같은 결과를 얻었다면 모델이 정상적으로 작동하는 것입니다.
# 코드 중 담당하신 MODEL_NAME에 해당하는 주석(#)을 제거하고 실행해 주세요.

In [3]:
import os, torch                                            # [S1]
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms
from pathlib import Path

from core.models.model_factory import create_model
from core.data.dataset import EmotionDataset
# from core.training.trainer import train_model              # [S2] 필요 시 주석 해제
from core.training.trainer_v3 import train_model            # [S2] AMP/비동기/클리핑 지원
import random
import numpy as np
import torch
import random, numpy as np, torch
from datetime import datetime
from pathlib import Path
def seed_worker(worker_id:int):
    s = torch.initial_seed() % 2**32
    random.seed(s); np.random.seed(s)

In [4]:
def set_global_seed(seed: int):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    # 속도 유지 버전: 결정론 강제는 생략 (필요시 아래 “옵션” 참고)

In [5]:
SEED = 42


In [6]:
import torch.multiprocessing as mp
ctx = mp.get_context("spawn")
g = torch.Generator().manual_seed(SEED)

In [8]:
if __name__ == '__main__':
    # 시드 고정
    
    set_global_seed(SEED)
    # 장치
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {DEVICE}")

    # 전역 성능 플래그 (Ampere↑ 권장)                           # [S3]
    torch.backends.cudnn.benchmark = True
    torch.backends.cuda.matmul.allow_tf32 = True
    if hasattr(torch.backends.cudnn, "allow_tf32"):
        torch.backends.cudnn.allow_tf32 = True
    try:
        torch.set_float32_matmul_precision('high')  # PyTorch 2.0+
    except Exception:
        pass

    # 데이터 경로/모델 선택
    DATA_DIR = Path("./datasets/korean_emotion_complex_vision_10_percent_SI")   
    MODEL_NAME = 'resnet18'
    # MODEL_NAME = 'resnet50'
    # MODEL_NAME = 'mobilenet_v3_small'
    # MODEL_NAME = 'shufflenet_v2'
    # MODEL_NAME = 'efficientnet_v2_s'
    # MODEL_NAME = 'squeezenet'

    NUM_CLASSES = 7
    BATCH_SIZE = 128
    LEARNING_RATE = 0.001
    RUN_DIR = Path(f"./runs/{MODEL_NAME}_{datetime.now().strftime('%Y%m%d_%H%M%S')}")
    RUN_DIR.mkdir(parents=True, exist_ok=True)
    BEST_CKPT_PATH = RUN_DIR / "best.ckpt"

    # --- Epoch 슬라이더(가능 시) --------------------------------  # [S4]
    NUM_EPOCHS_DEFAULT = 100
    try:
        from ipywidgets import IntSlider, display
        _epoch_slider = IntSlider(description='Epochs', value=NUM_EPOCHS_DEFAULT, min=1, max=200, step=1)
        display(_epoch_slider)
        NUM_EPOCHS = _epoch_slider.value
    except Exception:
        NUM_EPOCHS = NUM_EPOCHS_DEFAULT

    EARLY_STOPPING_PATIENCE = 6

    imagenet_mean = [0.485, 0.456, 0.406]
    imagenet_std  = [0.229, 0.224, 0.225]

    # Transform
    train_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomRotation(15),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
        transforms.ToTensor(),
        transforms.Normalize(mean=imagenet_mean, std=imagenet_std),
    ])
    val_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=imagenet_mean, std=imagenet_std),
    ])

    # Dataset
    train_dataset = EmotionDataset(data_dir=DATA_DIR / "train", transform=train_transform)
    val_dataset   = EmotionDataset(data_dir=DATA_DIR / "val",   transform=val_transform)

    # DataLoader I/O 튜닝                                         # [S5]
    NUM_WORKERS = min(4, os.cpu_count() or 2)
    PIN_MEMORY = True
    PERSISTENT = True if NUM_WORKERS > 0 else False
    PREFETCH = 2 if NUM_WORKERS > 0 else None
    print('NUM_WORKERS: ',NUM_WORKERS)

    g = torch.Generator()
    g.manual_seed(SEED)

    def build_loader(ds, bs, shuffle):
        return DataLoader(
            ds, batch_size=bs, shuffle=shuffle,
            num_workers=NUM_WORKERS,
            pin_memory=PIN_MEMORY,
            persistent_workers=PERSISTENT,
            prefetch_factor=PREFETCH,
            drop_last=shuffle,  # train에서만 True → BN 안정화(선택)
            # worker_init_fn=seed_worker,        # ★ 추가
            generator=g,                       # ★ 추가
            # multiprocessing_context=ctx         # ★
        )
    

    train_loader = build_loader(train_dataset, BATCH_SIZE, True)
    val_loader   = build_loader(val_dataset,   BATCH_SIZE, False)

    # 클래스 수 동기화(폴더명 기반)                                # [S6]
    NUM_CLASSES = len(train_dataset.classes)
    print("데이터 준비 완료!")
    print(f"훈련 데이터셋 크기: {len(train_dataset)}")
    print(f"클래스 수: {NUM_CLASSES} -> {train_dataset.classes}")

    # 모델/손실/옵티마
    model = create_model(model_name=MODEL_NAME, num_classes=NUM_CLASSES).to(DEVICE)
    # # --- Progressive Unfreeze 설정 ---
    # UNFREEZE_AT = 6          # 6에폭부터 백본 학습 시작
    # LR_HEAD = 1e-3
    # LR_BACKBONE = 1e-4
    # WEIGHT_DECAY = 1e-4
    LABEL_SMOOTH = 0.05

    # # 1) 헤드만 학습: 백본 freeze
    # for name, p in model.named_parameters():
    #     if not any(k in name for k in ['fc', 'classifier']):  # resnet은 fc, mobilenet/shuffle은 classifier가 헤드
    #         p.requires_grad = False

    # # 2) 헤드 파라미터만 옵티마이저에
    # head_params = [p for p in model.parameters() if p.requires_grad]
    # optimizer = optim.AdamW([{'params': head_params, 'lr': LR_HEAD, 'weight_decay': WEIGHT_DECAY}])

    # 3) 라벨 스무딩 적용(권장)
    criterion = nn.CrossEntropyLoss(label_smoothing=LABEL_SMOOTH)

    
    # criterion = nn.CrossEntropyLoss()
    # optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
    optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)

    print(f"'{MODEL_NAME}' 모델, 손실 함수, 옵티마이저 준비 완료!")

    # 학습 실행 (AMP/클리핑/로깅/체크포인트/TF32·cudnn)              # [S7]
    os.makedirs("checkpoints", exist_ok=True)
    print("\n모델 훈련을 시작합니다...")
    # trained_model = train_model(
    #     model, train_loader, val_loader,
    #     criterion, optimizer, DEVICE,
    #     num_epochs=NUM_EPOCHS,
    #     patience=EARLY_STOPPING_PATIENCE,
    #     use_amp=True, amp_in_val=True,
    #     grad_clip=1.0, log_interval=50,
    #     save_best_path="checkpoints/best.pt",
    #     set_tf32=True, cudnn_benchmark=True
    # )
    trained_model = train_model(
    model, train_loader, val_loader, criterion, optimizer, DEVICE,
    num_epochs=NUM_EPOCHS, patience=EARLY_STOPPING_PATIENCE,
    use_amp=True, amp_in_val=True, grad_clip=1.0,
    # 아래 3줄이 신규
    # unfreeze_at=UNFREEZE_AT,
    # lr_backbone=LR_BACKBONE,
    print_metrics=['acc','macro_f1','per_class_f1','confmat'],
    save_best_path=str(BEST_CKPT_PATH)
    )

    # torch.save(trained_model.state_dict(), f'{MODEL_NAME}_trained.pth')
    # print("훈련된 모델 가중치가 저장되었습니다.")
# %%

Using device: cuda
NUM_WORKERS:  4
데이터 준비 완료!
훈련 데이터셋 크기: 33664
클래스 수: 7 -> ['기쁨', '당황', '분노', '불안', '상처', '슬픔', '중립']
'resnet18' 모델, 손실 함수, 옵티마이저 준비 완료!

모델 훈련을 시작합니다...
Epoch 1/100
----------
[E1 B50] loss=1.5671 acc=0.408
[E1 B100] loss=1.4197 acc=0.479
[E1 B150] loss=1.3376 acc=0.520
[E1 B200] loss=1.2872 acc=0.546
[E1 B250] loss=1.2501 acc=0.563
Train Loss: 1.2428 Acc: 0.5670
Val Loss: 1.0881 Acc: 0.6347

Macro-F1: 0.6216
Per-class F1: 0:0.934, 1:0.707, 2:0.654, 3:0.420, 4:0.313, 5:0.638, 6:0.684
ConfMat (rows=true, cols=pred):
 [[960  16  12  10   1  11   7]
 [ 11 770  49  96  41   2  42]
 [ 19  49 653  86  61  22  73]
 [ 18 193 139 388 104  65 148]
 [  9 111 110 125 254 233 150]
 [ 13  14  47  62 143 644 116]
 [  8  14  23  24  25   4 686]]
  -> Val Loss 개선됨! (1.0881)
Epoch 2/100
----------
[E2 B50] loss=1.0427 acc=0.662
[E2 B100] loss=1.0346 acc=0.666
[E2 B150] loss=1.0328 acc=0.665
[E2 B200] loss=1.0292 acc=0.666
[E2 B250] loss=1.0303 acc=0.665
Train Loss: 1.0297 Acc: 0.6650
V

In [9]:
# ===== 재검증 + 오분류 저장 + 혼동행렬/클래스별 acc =====
import os, csv, json, shutil
from pathlib import Path
import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader
import numpy as np

from core.models.model_factory import create_model
from core.data.dataset import EmotionDataset

# 0) 경로/런폴더 정리 (RUN_DIR/BEST_CKPT_PATH 없으면 만들어줌)
if 'RUN_DIR' not in globals():
    from datetime import datetime
    RUN_DIR = Path(f"./runs/{MODEL_NAME}_{datetime.now().strftime('%Y%m%d_%H%M%S')}")
    RUN_DIR.mkdir(parents=True, exist_ok=True)
if 'BEST_CKPT_PATH' not in globals():
    BEST_CKPT_PATH = RUN_DIR / "best.ckpt"

MIS_DIR = RUN_DIR / "misclassified"
MIS_DIR.mkdir(parents=True, exist_ok=True)
CSV_PATH = RUN_DIR / "val_predictions.csv"
CM_NPY  = RUN_DIR / "confusion_matrix.npy"
PCA_JSON = RUN_DIR / "per_class_acc.json"

# 1) 모델 로드 (체크포인트 있으면 ckpt, 없으면 trained_model로)
device = DEVICE
model = create_model(MODEL_NAME, NUM_CLASSES, pretrained=False).to(device)

state_loaded = False
if Path(BEST_CKPT_PATH).exists():
    ckpt = torch.load(BEST_CKPT_PATH, map_location=device)
    # trainer_v2 저장 포맷: {'model': state_dict, 'epoch': int, 'val_loss': float}
    sd = ckpt['model'] if isinstance(ckpt, dict) and 'model' in ckpt else ckpt
    model.load_state_dict(sd)
    state_loaded = True
elif 'trained_model' in globals():
    model.load_state_dict(trained_model.state_dict())
    state_loaded = True

if not state_loaded:
    print("⚠️ 체크포인트/훈련모델이 없어 현재 파라미터로 검증합니다.")

model.eval()

# 2) 검증 데이터로더 준비 (있으면 재사용, 없으면 생성)
if 'val_loader' not in globals():
    val_dataset = EmotionDataset(DATA_DIR / "val", transform=val_transform)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
else:
    val_dataset = val_loader.dataset

label_names = getattr(val_dataset, 'classes', [str(i) for i in range(NUM_CLASSES)])

# 3) 단일 검증 패스 + 오분류 저장(원본 파일 복사) + CSV 기록
total, correct = 0, 0
rows = []  # CSV용

with torch.no_grad():
    for bidx, (images, targets) in enumerate(val_loader):
        images = images.to(device, non_blocking=True) if images.is_cuda else images.to(device)
        targets = targets.to(device)
        logits = model(images)
        probs  = F.softmax(logits, dim=1)
        confs, preds = probs.max(dim=1)

        correct += (preds == targets).sum().item()
        total   += targets.size(0)

        # 전제: val_loader는 shuffle=False → dataset.image_paths 순서 == 배치 순서
        base = bidx * val_loader.batch_size
        for i in range(targets.size(0)):
            gidx     = base + i
            true_idx = targets[i].item()
            pred_idx = preds[i].item()
            conf     = float(confs[i].item())

            img_path = Path(val_dataset.image_paths[gidx])  # 원본 경로
            true_nm, pred_nm = label_names[true_idx], label_names[pred_idx]

            # 오분류면 원본 이미지를 mis_dir로 복사 (덮어쓰기 방지 이름)
            if pred_idx != true_idx:
                dst = MIS_DIR / f"{gidx:06d}_true-{true_nm}_pred-{pred_nm}_{conf:.2f}{img_path.suffix}"
                try:
                    shutil.copy2(img_path, dst)
                except Exception as e:
                    print(f"copy fail: {img_path} -> {dst} ({e})")

            rows.append({
                "index": gidx,
                "path": str(img_path),
                "true": true_nm,
                "pred": pred_nm,
                "conf": conf,
                "is_correct": int(pred_idx == true_idx),
            })

acc = correct / max(1, total)
print(f"[Re-Eval] Val Acc: {acc:.4f}  ({correct}/{total})")

# CSV 저장
with open(CSV_PATH, "w", newline="", encoding="utf-8") as f:
    w = csv.DictWriter(f, fieldnames=["index","path","true","pred","conf","is_correct"])
    w.writeheader()
    w.writerows(rows)
print(f"→ CSV 저장: {CSV_PATH}")
print(f"→ 오분류 이미지 폴더: {MIS_DIR}")

# 4) 혼동행렬/클래스별 acc 저장
cm = np.zeros((NUM_CLASSES, NUM_CLASSES), dtype=int)
for r in rows:
    ti = label_names.index(r["true"])
    pi = label_names.index(r["pred"])
    cm[ti, pi] += 1

np.save(CM_NPY, cm)

per_class_acc = {}
for i, name in enumerate(label_names):
    denom = int(cm[i, :].sum())
    per_class_acc[name] = float(cm[i, i] / denom) if denom > 0 else 0.0

with open(PCA_JSON, "w", encoding="utf-8") as f:
    json.dump(per_class_acc, f, ensure_ascii=False, indent=2)

print("클래스별 acc:")
for name in label_names:
    print(f"  {name:>4s}: {per_class_acc[name]:.3f}  (correct {cm[label_names.index(name), label_names.index(name)]}/{cm[label_names.index(name), :].sum()})")
print(f"→ 혼동행렬 NPY: {CM_NPY}")
print(f"→ 클래스별 acc JSON: {PCA_JSON}")


[Re-Eval] Val Acc: 0.6543  (4489/6861)
→ CSV 저장: runs\resnet18_20250818_030701\val_predictions.csv
→ 오분류 이미지 폴더: runs\resnet18_20250818_030701\misclassified
클래스별 acc:
    기쁨: 0.978  (correct 995/1017)
    당황: 0.716  (correct 724/1011)
    분노: 0.704  (correct 678/963)
    불안: 0.395  (correct 417/1055)
    상처: 0.445  (correct 441/992)
    슬픔: 0.574  (correct 596/1039)
    중립: 0.814  (correct 638/784)
→ 혼동행렬 NPY: runs\resnet18_20250818_030701\confusion_matrix.npy
→ 클래스별 acc JSON: runs\resnet18_20250818_030701\per_class_acc.json
