<a href="https://colab.research.google.com/github/Sungi-Hwang/Carclassification/blob/main/Eff8_HSG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [22]:
import gc, torch, sys
import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
# 1) 모델·텐서·데이터 지우기
# del model, xb, yb, logits, loss   # 남아 있을 변수 전부!
gc.collect()

# 2) CUDA 캐시 비우기 (reserved 해제)
torch.cuda.empty_cache()          # <─ 대부분 이걸로 해결

# 3) 필요하면 컨텍스트 초기화까지
torch.cuda.ipc_collect()          # (다른 프로세스가 할당한 메모리까지 회수)

# 4) 남은 게 있나 확인
print(torch.cuda.memory_allocated()/1e9,
      torch.cuda.memory_reserved()/1e9, 'GB')


0.734860288 6.07125504 GB


In [23]:
# # 한 번만 실행
# !unzip -oq "/content/drive/MyDrive/Colab Notebooks/open.zip" \
#        -d "/content/drive/MyDrive/Dacon/"


In [24]:
# ───────────────────── 1. IMPORT & 기본 설정 ─────────────────────
import os, random, numpy as np, torch, gc
import torch.nn as nn, torch.nn.functional as F
from torch.amp import autocast, GradScaler
from torch.utils.data import Dataset, DataLoader, Subset
from torchvision import transforms
from torchvision.models import efficientnet_v2_m, EfficientNet_V2_M_Weights
from pathlib import Path
from PIL import Image, ImageFile; ImageFile.LOAD_TRUNCATED_IMAGES = True
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss
from tqdm import tqdm
import pandas as pd

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32      = True
AMP_DTYPE = torch.bfloat16
# ● 모든 하이퍼파라미터를 한 곳에
CFG = {
    "EPOCHS"      : 15,
    "BATCH_SIZE"  : 64,        # OOM 나면 더 ↓
    "LR"          : 1e-4,
    "WEIGHT_DECAY": 1e-4,
}

# ● FP16 이 아닌 경우엔 GradScaler 효력 없음 → 자동 비활성
scaler = GradScaler(enabled = (AMP_DTYPE is torch.float16))

In [25]:
print(os.getcwd())

/content


In [26]:
# ───────────────────── 2. 라벨 정규화 도우미 ─────────────────────
from collections import defaultdict

alias_pairs = [
    ("K5_3세대_하이브리드_2020_2022", "K5_하이브리드_3세대_2020_2023"),
    ("디_올뉴니로_2022_2025"      , "디_올_뉴_니로_2022_2025"),
    ("718_박스터_2017_2024"       , "박스터_718_2017_2024"),
]
alias = {b: a for a, b in alias_pairs} | {a: a for a, _ in alias_pairs}
canon  = lambda lbl: alias.get(lbl, lbl)
# 🔁 역매핑
canon_to_originals = defaultdict(list)
for a, b in alias_pairs:
    canon_to_originals[canon(a)].append(a)
    canon_to_originals[canon(b)].append(b)


In [27]:
class CustomImageDataset(Dataset):
    def __init__(self, root, transform=None):
        self.samples = []  # (image_path, canonical_class)
        self.original_classes = []  # original label
        self.canonical_classes = []  # canonical label

        for p in sorted(Path(root).iterdir()):
            if p.is_dir():
                orig = p.name
                cls = canon(orig)
                self.original_classes.append(orig)
                self.canonical_classes.append(cls)

                for img in p.glob("*.jpg"):
                    self.samples.append((img, cls))

        self.classes = sorted(set(self.canonical_classes))
        self.class_to_idx = {c: i for i, c in enumerate(self.classes)}
        self.idx_to_class = {i: c for c, i in self.class_to_idx.items()}
        self.transform = transform

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        path, cls = self.samples[idx]
        img = Image.open(path).convert("RGB")
        if self.transform:
            img = self.transform(img)
        return img, self.class_to_idx[cls]


In [28]:
# ───────────────────── 4. 클래스 목록 & Transform ─────────────────
sub = pd.read_csv("/content/drive/MyDrive/Dacon/sample_submission.csv")
CLASS_NAMES = [canon(c) for c in sub.columns[1:]]
NUM_CLASSES = len(CLASS_NAMES)

weights  = EfficientNet_V2_M_Weights.DEFAULT
preset   = weights.transforms()
IMG_SIZE = 480
train_tf = transforms.Compose([
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.7, 1.0), antialias=True),
    transforms.RandomHorizontalFlip(),
    preset,
])
val_tf = transforms.Compose([
    transforms.Resize(int(IMG_SIZE*1.15), antialias=True),
    transforms.CenterCrop(IMG_SIZE),
    preset,
])

In [29]:
# ───────────────────── 5. Dataset & DataLoader ───────────────────
root = "/content/drive/MyDrive/Dacon/train"
ds_full   = CustomImageDataset(root, transform=train_tf)
ds_full_v = CustomImageDataset(root, transform=val_tf)

targets   = [lbl for _, lbl in ds_full.samples]
tr_idx, v_idx = train_test_split(
    np.arange(len(ds_full)),
    test_size=0.2,
    stratify=targets,
    random_state=42
)

train_loader = DataLoader(
    Subset(ds_full, tr_idx),
    batch_size = CFG["BATCH_SIZE"],     # ● CFG 사용
    shuffle    = True,
    num_workers=8,
    pin_memory = True,
    persistent_workers=True,
    prefetch_factor=4,
)
val_loader = DataLoader(
    Subset(ds_full_v, v_idx),
    batch_size = CFG["BATCH_SIZE"],     # ● CFG 사용
    shuffle    = False,
    num_workers=4,
    pin_memory = True,
    persistent_workers=True,
)


In [30]:
# ───────────────────── 6. 모델, Optim, Criterion ─────────────────
model = efficientnet_v2_m(weights=weights)
in_f  = model.classifier[1].in_features
model.classifier[1] = nn.Linear(in_f, NUM_CLASSES)

model = model.to(device, dtype=AMP_DTYPE, memory_format=torch.channels_last)  # ●
model = model
crit = nn.CrossEntropyLoss(label_smoothing=0.1)
opt  = torch.optim.AdamW(
          model.parameters(),
          lr          = CFG["LR"],          # ●
          weight_decay= CFG["WEIGHT_DECAY"],
          fused=True
      )


In [None]:
# ───────────────────── 7. Train / Val 루프 ───────────────────────
best = np.inf
SAVE_PATH = "/content/drive/MyDrive/best_model.pth"
for ep in range(CFG["EPOCHS"]):
    # ---- train ----
    model.train(); tl = 0
    for xb, yb in tqdm(train_loader, desc=f"Ep{ep+1} Train"):
        xb = xb.to(device, memory_format=torch.channels_last, dtype = AMP_DTYPE)  # ●
        yb = yb.to(device)
        opt.zero_grad(set_to_none=True)
        # with torch.no_grad():
        #     print("✔ yb.shape:", yb.shape)
        #     print("✔ yb.dtype:", yb.dtype)
        #     print("✔ yb min:", yb.min().item(), "max:", yb.max().item())
        #     dummy_x = torch.randn_like(xb, dtype=AMP_DTYPE)
        #     dummy_out = model(dummy_x)
        #     print("✔ logits.shape[1] (num_classes):", dummy_out.shape[1])
        #     assert yb.dtype == torch.long
        #     assert yb.max() < dummy_out.shape[1], "❗ yb 값이 클래스 수보다 큽니다!"
        with autocast("cuda",dtype=AMP_DTYPE):             # ●
            logits = model(xb)
            loss   = crit(logits, yb)

        if scaler.is_enabled():                             # ● FP16 전용 경로
            scaler.scale(loss).backward()
            scaler.step(opt)
            scaler.update()
        else:                                               # ● BF16 경로
            loss.backward()
            opt.step()

        tl += loss.item()

    # ---- val ----
    model.eval(); all_p, all_y = [], []; correct = total = 0
    with torch.no_grad():
        for xb, yb in tqdm(val_loader, desc=f"Ep{ep+1} Val"):
            xb = xb.to(device, memory_format=torch.channels_last, dtype=AMP_DTYPE)
            yb = yb.to(device)
            # with torch.no_grad():
            #     print("✔ yb.shape:", yb.shape)
            #     print("✔ yb.dtype:", yb.dtype)
            #     print("✔ yb min:", yb.min().item(), "max:", yb.max().item())
            #     print("✔ logits.shape[1] (num_classes):", model(torch.randn_like(xb)).shape[1])
            #     assert yb.dtype == torch.long
            #     assert yb.max() < model(torch.randn_like(xb)).shape[1]
            with autocast("cuda", dtype=AMP_DTYPE):
                out = model(xb)
            prob = F.softmax(out, 1)
            pred = prob.argmax(1)
            correct += (pred == yb).sum().item(); total += yb.size(0)
            all_p.append(prob.cpu().float().numpy())
            all_y.append(yb.cpu().numpy())

    logloss = log_loss(np.concatenate(all_y), np.concatenate(all_p))
    acc     = 100 * correct / total
    print(f"Ep{ep+1:02d}  train_loss={tl/len(train_loader):.4f}  "
          f"val_logloss={logloss:.4f}  acc={acc:.2f}%")

    if logloss < best:
        best = logloss
        torch.save(model.state_dict(), SAVE_PATH)
        print("   ✅ best_model.pth saved (improved)")

Ep1 Train:  45%|████▌     | 188/415 [01:50<02:09,  1.75it/s]

In [None]:
xb, yb = next(iter(train_loader))
print("✔ yb shape:", yb.shape)
print("✔ yb dtype:", yb.dtype)
print("✔ yb min:", yb.min().item())
print("✔ yb max:", yb.max().item())

# 그리고
logits = model(xb.to(device))
print("✔ logits shape:", logits.shape)
