In [19]:
import os, random
from pathlib import Path
from collections import Counter

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, WeightedRandomSampler
from torchvision.datasets import ImageFolder
from torchvision import transforms
from torchvision.transforms import InterpolationMode, functional as TF

import timm
from timm.data import resolve_model_data_config
from sklearn.metrics import confusion_matrix
from transformers import get_cosine_schedule_with_warmup

##### LT_slit

In [105]:
# ---------- config ----------
DATA_DIR = Path("/camin1/chlee/Data/cornea_image/segmented/Lt/slit/split")  # <- 본인 경로로 변경
TRAIN_DIR    = DATA_DIR / "train"
VAL_DIR      = DATA_DIR / "val"

NUM_CLASSES  = 4
TARGET_SIZE  = 384                     # 모델 권장 입력
BATCH_SIZE   = 8                       # OOM 나면 줄이세요
EPOCHS       = 80
LR_HEAD      = 3e-4
LR_BACKBONE  = 1e-5
WEIGHT_DECAY = 0.05
UNFREEZE_STAGES = 2                    # 마지막 stage 2개 학습(총 4개 중 3,4)
NUM_WORKERS  = 4
SEED         = 42


In [106]:
random.seed(SEED); torch.manual_seed(SEED); torch.cuda.manual_seed_all(SEED)

In [107]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

In [108]:

# ---------- model ----------
MODEL_ID = "convnextv2_base.fcmae_ft_in22k_in1k_384"
model = timm.create_model(MODEL_ID, pretrained=True, num_classes=NUM_CLASSES)

In [109]:

# 안전하게 분류기 리셋
if hasattr(model, "reset_classifier"):
    model.reset_classifier(num_classes=NUM_CLASSES)
model.to(DEVICE)


ConvNeXt(
  (stem): Sequential(
    (0): Conv2d(3, 128, kernel_size=(4, 4), stride=(4, 4))
    (1): LayerNorm2d((128,), eps=1e-06, elementwise_affine=True)
  )
  (stages): Sequential(
    (0): ConvNeXtStage(
      (downsample): Identity()
      (blocks): Sequential(
        (0): ConvNeXtBlock(
          (conv_dw): Conv2d(128, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=128)
          (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
          (mlp): GlobalResponseNormMlp(
            (fc1): Linear(in_features=128, out_features=512, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (grn): GlobalResponseNorm()
            (fc2): Linear(in_features=512, out_features=128, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): Identity()
        )
        (1): ConvNeXtBlock(
          (conv_dw): Conv2d(128, 128, kernel_size=(7, 7), strid

In [110]:

# ---------- transforms (LongestSide + square pad) ----------
cfg = resolve_model_data_config(model)  # mean/std


In [111]:

class ResizeLongestSide:
    def __init__(self, size, interpolation=InterpolationMode.BICUBIC):
        self.size = size; self.interp = interpolation
    def __call__(self, img):
        w, h = img.size
        scale = self.size / max(w, h)
        new_w, new_h = int(round(w*scale)), int(round(h*scale))
        return TF.resize(img, (new_h, new_w), interpolation=self.interp, antialias=True)

class PadToSquare:
    def __init__(self, size, fill=(0,0,0)):
        self.size = size; self.fill = fill
    def __call__(self, img):
        w, h = img.size
        pw, ph = self.size - w, self.size - h
        pad = (pw//2, ph//2, pw - pw//2, ph - ph//2)
        return TF.pad(img, pad, fill=self.fill)


In [112]:

pad_fill = tuple(int(m*255) for m in cfg['mean'])  # 배경을 mean 색으로 패딩

train_tf = transforms.Compose([
    ResizeLongestSide(TARGET_SIZE),
    PadToSquare(TARGET_SIZE, fill=pad_fill),
    # 의료: 강한 크롭/플립 제거, 약한 회전만
    transforms.RandomApply([transforms.RandomRotation(5, interpolation=InterpolationMode.BILINEAR)], p=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=cfg['mean'], std=cfg['std']),
])

val_tf = transforms.Compose([
    ResizeLongestSide(TARGET_SIZE),
    PadToSquare(TARGET_SIZE, fill=pad_fill),
    transforms.ToTensor(),
    transforms.Normalize(mean=cfg['mean'], std=cfg['std']),
])


In [113]:

# ---------- datasets / loaders ----------
train_ds = ImageFolder(TRAIN_DIR, transform=train_tf)
val_ds   = ImageFolder(VAL_DIR,   transform=val_tf)


In [114]:

print("train classes:", train_ds.class_to_idx)
print("val classes:",   val_ds.class_to_idx)
print("train counts:",  Counter(train_ds.targets))
print("val counts:",    Counter(val_ds.targets))


train classes: {'0': 0, '1': 1, '2': 2, '3': 3}
val classes: {'0': 0, '1': 1, '2': 2, '3': 3}
train counts: Counter({2: 78, 0: 75, 3: 45, 1: 11})
val counts: Counter({2: 56, 0: 45, 3: 27, 1: 14})


In [115]:
targets = torch.tensor(train_ds.targets)
class_counts = torch.bincount(targets, minlength=NUM_CLASSES).float()
# 각 클래스 반비례 가중치(총합 정규화)
weights = (class_counts.sum() / (class_counts + 1e-6)).to(DEVICE)

criterion = nn.CrossEntropyLoss(
    weight=weights,
    label_smoothing=0.1,  # 작은 데이터에 도움
).to(DEVICE)

train_loader = DataLoader(
    train_ds, batch_size=BATCH_SIZE, shuffle=True,
    num_workers=NUM_WORKERS, pin_memory=True
)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False,
                          num_workers=NUM_WORKERS, pin_memory=True)

In [116]:

# ---------- partial fine-tune (head + last stages) ----------
for p in model.parameters():
    p.requires_grad = False


In [117]:

# head 활성화
if hasattr(model, "head"):
    for p in model.head.parameters():
        p.requires_grad = True
# 마지막 N개 stage 활성화
if hasattr(model, "stages"):
    total = len(model.stages)
    start = max(0, total - UNFREEZE_STAGES)
    for i in range(start, total):
        for p in model.stages[i].parameters():
            p.requires_grad = True


In [118]:

# 파라미터 그룹 (head vs backbone)
head_params, backbone_params = [], []
for n, p in model.named_parameters():
    if p.requires_grad:
        if ("head" in n) or ("classifier" in n):
            head_params.append(p)
        else:
            backbone_params.append(p)


In [119]:
UNFREEZE_STAGES = 4  # 전부
for p in model.parameters(): p.requires_grad = False
for p in model.head.parameters(): p.requires_grad = True
for i in range(len(model.stages) - UNFREEZE_STAGES, len(model.stages)):
    for p in model.stages[i].parameters():
        p.requires_grad = True

optimizer = torch.optim.AdamW([
    {"params": [p for n,p in model.named_parameters() if p.requires_grad and "head" not in n], "lr": 5e-5},
    {"params": [p for n,p in model.named_parameters() if p.requires_grad and ("head" in n or "classifier" in n)], "lr": 3e-4},
], weight_decay=0.01)



num_train_steps = EPOCHS * len(train_loader)
scheduler = get_cosine_schedule_with_warmup(
    optimizer,
    num_warmup_steps=int(0.1 * num_train_steps),
    num_training_steps=num_train_steps
)

criterion = nn.CrossEntropyLoss().to(DEVICE)
scaler = torch.amp.GradScaler(enabled=(DEVICE=="cuda"))


In [120]:

# ---------- utils ----------
@torch.no_grad()
def evaluate():
    model.eval()
    total, correct, total_loss = 0, 0, 0.0
    for x, y in val_loader:
        x, y = x.to(DEVICE), y.to(DEVICE)
        logits = model(x)
        total_loss += criterion(logits, y).item() * x.size(0)
        pred = logits.argmax(1)
        correct += (pred == y).sum().item()
        total += y.size(0)
    return total_loss / max(total, 1), correct / max(total, 1)


def eval_confmat():
    model.eval()
    ys, ps = [], []
    with torch.no_grad():
        for x, y in val_loader:
            x = x.to(DEVICE)
            ps.extend(model(x).argmax(1).cpu().tolist())
            ys.extend(y.tolist())
    cm = confusion_matrix(ys, ps, labels=list(range(NUM_CLASSES)))
    print("Confusion Matrix:\n", cm)

def pred_hist():
    model.eval()
    ps = []
    with torch.no_grad():
        for x, _ in val_loader:
            x = x.to(DEVICE)
            ps.extend(model(x).argmax(1).cpu().tolist())
    hist = torch.bincount(torch.tensor(ps), minlength=NUM_CLASSES).tolist()
    print("pred hist:", hist)


In [121]:

# ---------- training ----------
print("trainable params:", sum(p.numel() for p in model.parameters() if p.requires_grad))


trainable params: 87690372


In [122]:
# ==== EarlyStopping 파라미터 ====
best_loss = float('inf')
patience = 15         # 권장: 10~15
min_delta = 1e-3      # 이만큼 내려가야 “개선”으로 인정
no_improve = 0

for ep in range(1, EPOCHS + 1):
    model.train()
    running = 0.0
    for step, (x, y) in enumerate(train_loader, 1):
        x, y = x.to(DEVICE), y.to(DEVICE)
        with torch.amp.autocast(device_type=DEVICE, enabled=(DEVICE=="cuda")):
            logits = model(x)
            loss = criterion(logits, y)

        optimizer.zero_grad(set_to_none=True)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()  # per-step 스케줄 유지

        running += loss.item()
        if step % 50 == 0:
            print(f"ep {ep} | step {step}/{len(train_loader)} | train_loss {running/50:.4f}")
            running = 0.0

    val_loss, val_acc = evaluate()
    print(f"[{ep}] val_loss={val_loss:.4f}  acc={val_acc:.4f}")

    # 저장 & EarlyStopping 로직
    if best_loss - val_loss > min_delta:
        best_loss = val_loss
        no_improve = 0
        # torch.save(model.state_dict(), "/camin1/chlee/jupyter/Keratopathy AI Project/[25-08-11]/convnextv2_base_384_cornea_best.pt")
        # print("💾 saved: convnextv2_base_384_cornea_best.pt")
    else:
        no_improve += 1
        if no_improve >= patience:
            print(f"⏹ Early stopped at epoch {ep} (no improvement for {patience} epochs).")
            break



[1] val_loss=1.2923  acc=0.3732
[2] val_loss=1.3290  acc=0.3451
[3] val_loss=1.3058  acc=0.3521
[4] val_loss=1.2301  acc=0.4296
[5] val_loss=1.3959  acc=0.4085
[6] val_loss=1.7480  acc=0.4366
[7] val_loss=1.8425  acc=0.4789
[8] val_loss=2.6416  acc=0.3662
[9] val_loss=2.7400  acc=0.2817
[10] val_loss=2.0852  acc=0.3944
[11] val_loss=1.8593  acc=0.3873
[12] val_loss=2.9054  acc=0.4155
[13] val_loss=2.7738  acc=0.4296
[14] val_loss=2.2576  acc=0.3944
[15] val_loss=2.5976  acc=0.3944
[16] val_loss=2.6085  acc=0.4296
[17] val_loss=2.5613  acc=0.4296
[18] val_loss=2.7515  acc=0.4789
[19] val_loss=3.5832  acc=0.3944
⏹ Early stopped at epoch 19 (no improvement for 15 epochs).


##### LT_slit_beam

In [124]:
DATA_DIR = Path("/camin1/chlee/Data/cornea_image/segmented/Lt/slit_beam/split")  # <- 본인 경로로 변경
TRAIN_DIR    = DATA_DIR / "train"
VAL_DIR      = DATA_DIR / "val"

model = timm.create_model(MODEL_ID, pretrained=True, num_classes=NUM_CLASSES)

# 안전하게 분류기 리셋
if hasattr(model, "reset_classifier"):
    model.reset_classifier(num_classes=NUM_CLASSES)
model.to(DEVICE)


# ---------- transforms (LongestSide + square pad) ----------
cfg = resolve_model_data_config(model)  # mean/std


pad_fill = tuple(int(m*255) for m in cfg['mean'])  # 배경을 mean 색으로 패딩

train_tf = transforms.Compose([
    ResizeLongestSide(TARGET_SIZE),
    PadToSquare(TARGET_SIZE, fill=pad_fill),
    # 의료: 강한 크롭/플립 제거, 약한 회전만
    transforms.RandomApply([transforms.RandomRotation(5, interpolation=InterpolationMode.BILINEAR)], p=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=cfg['mean'], std=cfg['std']),
])

val_tf = transforms.Compose([
    ResizeLongestSide(TARGET_SIZE),
    PadToSquare(TARGET_SIZE, fill=pad_fill),
    transforms.ToTensor(),
    transforms.Normalize(mean=cfg['mean'], std=cfg['std']),
])


# ---------- datasets / loaders ----------
train_ds = ImageFolder(TRAIN_DIR, transform=train_tf)
val_ds   = ImageFolder(VAL_DIR,   transform=val_tf)


print("train classes:", train_ds.class_to_idx)
print("val classes:",   val_ds.class_to_idx)
print("train counts:",  Counter(train_ds.targets))
print("val counts:",    Counter(val_ds.targets))


train classes: {'0': 0, '1': 1, '2': 2, '3': 3}
val classes: {'0': 0, '1': 1, '2': 2, '3': 3}
train counts: Counter({2: 63, 0: 59, 3: 43, 1: 9})
val counts: Counter({2: 49, 0: 41, 3: 22, 1: 12})


In [125]:
targets = torch.tensor(train_ds.targets)
class_counts = torch.bincount(targets, minlength=NUM_CLASSES).float()
# 각 클래스 반비례 가중치(총합 정규화)
weights = (class_counts.sum() / (class_counts + 1e-6)).to(DEVICE)

criterion = nn.CrossEntropyLoss(
    weight=weights,
    label_smoothing=0.1,  # 작은 데이터에 도움
).to(DEVICE)

train_loader = DataLoader(
    train_ds, batch_size=BATCH_SIZE, shuffle=True,
    num_workers=NUM_WORKERS, pin_memory=True
)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False,
                          num_workers=NUM_WORKERS, pin_memory=True)


# ---------- partial fine-tune (head + last stages) ----------
for p in model.parameters():
    p.requires_grad = False


# head 활성화
if hasattr(model, "head"):
    for p in model.head.parameters():
        p.requires_grad = True
# 마지막 N개 stage 활성화
if hasattr(model, "stages"):
    total = len(model.stages)
    start = max(0, total - UNFREEZE_STAGES)
    for i in range(start, total):
        for p in model.stages[i].parameters():
            p.requires_grad = True


In [126]:


# 파라미터 그룹 (head vs backbone)
head_params, backbone_params = [], []
for n, p in model.named_parameters():
    if p.requires_grad:
        if ("head" in n) or ("classifier" in n):
            head_params.append(p)
        else:
            backbone_params.append(p)


UNFREEZE_STAGES = 4  # 전부
for p in model.parameters(): p.requires_grad = False
for p in model.head.parameters(): p.requires_grad = True
for i in range(len(model.stages) - UNFREEZE_STAGES, len(model.stages)):
    for p in model.stages[i].parameters():
        p.requires_grad = True

optimizer = torch.optim.AdamW([
    {"params": [p for n,p in model.named_parameters() if p.requires_grad and "head" not in n], "lr": 5e-5},
    {"params": [p for n,p in model.named_parameters() if p.requires_grad and ("head" in n or "classifier" in n)], "lr": 3e-4},
], weight_decay=0.01)




num_train_steps = EPOCHS * len(train_loader)
scheduler = get_cosine_schedule_with_warmup(
    optimizer,
    num_warmup_steps=int(0.1 * num_train_steps),
    num_training_steps=num_train_steps
)
criterion = nn.CrossEntropyLoss().to(DEVICE)
scaler = torch.amp.GradScaler(enabled=(DEVICE=="cuda"))


In [127]:

# ---------- training ----------
print("trainable params:", sum(p.numel() for p in model.parameters() if p.requires_grad))

trainable params: 87690372


In [128]:
# ==== EarlyStopping 파라미터 ====
best_loss = float('inf')
patience = 15         # 권장: 10~15
min_delta = 1e-3      # 이만큼 내려가야 “개선”으로 인정
no_improve = 0

for ep in range(1, EPOCHS + 1):
    model.train()
    running = 0.0
    for step, (x, y) in enumerate(train_loader, 1):
        x, y = x.to(DEVICE), y.to(DEVICE)
        with torch.amp.autocast(device_type=DEVICE, enabled=(DEVICE=="cuda")):
            logits = model(x)
            loss = criterion(logits, y)

        optimizer.zero_grad(set_to_none=True)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()  # per-step 스케줄 유지

        running += loss.item()
        if step % 50 == 0:
            print(f"ep {ep} | step {step}/{len(train_loader)} | train_loss {running/50:.4f}")
            running = 0.0

    val_loss, val_acc = evaluate()
    print(f"[{ep}] val_loss={val_loss:.4f}  acc={val_acc:.4f}")

    # 저장 & EarlyStopping 로직
    if best_loss - val_loss > min_delta:
        best_loss = val_loss
        no_improve = 0
        # torch.save(model.state_dict(), "/camin1/chlee/jupyter/Keratopathy AI Project/[25-08-11]/convnextv2_base_384_cornea_best.pt")
        # print("💾 saved: convnextv2_base_384_cornea_best.pt")
    else:
        no_improve += 1
        if no_improve >= patience:
            print(f"⏹ Early stopped at epoch {ep} (no improvement for {patience} epochs).")
            break

[1] val_loss=1.2860  acc=0.3871
[2] val_loss=1.3134  acc=0.3629
[3] val_loss=1.5284  acc=0.2177
[4] val_loss=1.2173  acc=0.5000
[5] val_loss=1.3448  acc=0.4597
[6] val_loss=1.4868  acc=0.4274
[7] val_loss=1.5112  acc=0.4516
[8] val_loss=1.7897  acc=0.4839
[9] val_loss=2.5653  acc=0.4032
[10] val_loss=2.6663  acc=0.4435
[11] val_loss=1.5744  acc=0.5242
[12] val_loss=2.2448  acc=0.4597
[13] val_loss=2.2924  acc=0.3629
[14] val_loss=2.0887  acc=0.4677
[15] val_loss=2.5199  acc=0.5000
[16] val_loss=2.0806  acc=0.5081
[17] val_loss=2.6700  acc=0.4355
[18] val_loss=2.5598  acc=0.4839
[19] val_loss=2.3762  acc=0.5161
⏹ Early stopped at epoch 19 (no improvement for 15 epochs).


##### RT_slit

In [129]:
DATA_DIR = Path("/camin1/chlee/Data/cornea_image/segmented/Rt/slit/split")  # <- 본인 경로로 변경
TRAIN_DIR    = DATA_DIR / "train"
VAL_DIR      = DATA_DIR / "val"

model = timm.create_model(MODEL_ID, pretrained=True, num_classes=NUM_CLASSES)

# 안전하게 분류기 리셋
if hasattr(model, "reset_classifier"):
    model.reset_classifier(num_classes=NUM_CLASSES)
model.to(DEVICE)


# ---------- transforms (LongestSide + square pad) ----------
cfg = resolve_model_data_config(model)  # mean/std


pad_fill = tuple(int(m*255) for m in cfg['mean'])  # 배경을 mean 색으로 패딩

train_tf = transforms.Compose([
    ResizeLongestSide(TARGET_SIZE),
    PadToSquare(TARGET_SIZE, fill=pad_fill),
    # 의료: 강한 크롭/플립 제거, 약한 회전만
    transforms.RandomApply([transforms.RandomRotation(5, interpolation=InterpolationMode.BILINEAR)], p=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=cfg['mean'], std=cfg['std']),
])

val_tf = transforms.Compose([
    ResizeLongestSide(TARGET_SIZE),
    PadToSquare(TARGET_SIZE, fill=pad_fill),
    transforms.ToTensor(),
    transforms.Normalize(mean=cfg['mean'], std=cfg['std']),
])


# ---------- datasets / loaders ----------
train_ds = ImageFolder(TRAIN_DIR, transform=train_tf)
val_ds   = ImageFolder(VAL_DIR,   transform=val_tf)


print("train classes:", train_ds.class_to_idx)
print("val classes:",   val_ds.class_to_idx)
print("train counts:",  Counter(train_ds.targets))
print("val counts:",    Counter(val_ds.targets))


train classes: {'0': 0, '1': 1, '2': 2, '3': 3}
val classes: {'0': 0, '1': 1, '2': 2, '3': 3}
train counts: Counter({2: 79, 0: 74, 3: 47, 1: 12})
val counts: Counter({2: 55, 0: 45, 3: 27, 1: 13})


In [130]:
targets = torch.tensor(train_ds.targets)
class_counts = torch.bincount(targets, minlength=NUM_CLASSES).float()
# 각 클래스 반비례 가중치(총합 정규화)
weights = (class_counts.sum() / (class_counts + 1e-6)).to(DEVICE)

criterion = nn.CrossEntropyLoss(
    weight=weights,
    label_smoothing=0.1,  # 작은 데이터에 도움
).to(DEVICE)

train_loader = DataLoader(
    train_ds, batch_size=BATCH_SIZE, shuffle=True,
    num_workers=NUM_WORKERS, pin_memory=True
)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False,
                          num_workers=NUM_WORKERS, pin_memory=True)


# ---------- partial fine-tune (head + last stages) ----------
for p in model.parameters():
    p.requires_grad = False


# head 활성화
if hasattr(model, "head"):
    for p in model.head.parameters():
        p.requires_grad = True
# 마지막 N개 stage 활성화
if hasattr(model, "stages"):
    total = len(model.stages)
    start = max(0, total - UNFREEZE_STAGES)
    for i in range(start, total):
        for p in model.stages[i].parameters():
            p.requires_grad = True


In [131]:


# 파라미터 그룹 (head vs backbone)
head_params, backbone_params = [], []
for n, p in model.named_parameters():
    if p.requires_grad:
        if ("head" in n) or ("classifier" in n):
            head_params.append(p)
        else:
            backbone_params.append(p)


UNFREEZE_STAGES = 4  # 전부
for p in model.parameters(): p.requires_grad = False
for p in model.head.parameters(): p.requires_grad = True
for i in range(len(model.stages) - UNFREEZE_STAGES, len(model.stages)):
    for p in model.stages[i].parameters():
        p.requires_grad = True

optimizer = torch.optim.AdamW([
    {"params": [p for n,p in model.named_parameters() if p.requires_grad and "head" not in n], "lr": 5e-5},
    {"params": [p for n,p in model.named_parameters() if p.requires_grad and ("head" in n or "classifier" in n)], "lr": 3e-4},
], weight_decay=0.01)




num_train_steps = EPOCHS * len(train_loader)
scheduler = get_cosine_schedule_with_warmup(
    optimizer,
    num_warmup_steps=int(0.1 * num_train_steps),
    num_training_steps=num_train_steps
)
criterion = nn.CrossEntropyLoss().to(DEVICE)
scaler = torch.amp.GradScaler(enabled=(DEVICE=="cuda"))


In [132]:

# ---------- training ----------
print("trainable params:", sum(p.numel() for p in model.parameters() if p.requires_grad))

trainable params: 87690372


In [133]:
# ==== EarlyStopping 파라미터 ====
best_loss = float('inf')
patience = 15         # 권장: 10~15
min_delta = 1e-3      # 이만큼 내려가야 “개선”으로 인정
no_improve = 0

for ep in range(1, EPOCHS + 1):
    model.train()
    running = 0.0
    for step, (x, y) in enumerate(train_loader, 1):
        x, y = x.to(DEVICE), y.to(DEVICE)
        with torch.amp.autocast(device_type=DEVICE, enabled=(DEVICE=="cuda")):
            logits = model(x)
            loss = criterion(logits, y)

        optimizer.zero_grad(set_to_none=True)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()  # per-step 스케줄 유지

        running += loss.item()
        if step % 50 == 0:
            print(f"ep {ep} | step {step}/{len(train_loader)} | train_loss {running/50:.4f}")
            running = 0.0

    val_loss, val_acc = evaluate()
    print(f"[{ep}] val_loss={val_loss:.4f}  acc={val_acc:.4f}")

    # 저장 & EarlyStopping 로직
    if best_loss - val_loss > min_delta:
        best_loss = val_loss
        no_improve = 0
        # torch.save(model.state_dict(), "/camin1/chlee/jupyter/Keratopathy AI Project/[25-08-11]/convnextv2_base_384_cornea_best.pt")
        # print("💾 saved: convnextv2_base_384_cornea_best.pt")
    else:
        no_improve += 1
        if no_improve >= patience:
            print(f"⏹ Early stopped at epoch {ep} (no improvement for {patience} epochs).")
            break

[1] val_loss=1.2960  acc=0.3929
[2] val_loss=1.3192  acc=0.3143
[3] val_loss=1.3245  acc=0.4000
[4] val_loss=1.2496  acc=0.4071
[5] val_loss=1.6755  acc=0.3357
[6] val_loss=1.4449  acc=0.4286
[7] val_loss=1.8047  acc=0.4429
[8] val_loss=1.9421  acc=0.3357
[9] val_loss=1.9782  acc=0.4643
[10] val_loss=2.5735  acc=0.4000
[11] val_loss=1.8575  acc=0.4286
[12] val_loss=2.4209  acc=0.3929
[13] val_loss=2.5187  acc=0.4071
[14] val_loss=2.5972  acc=0.4357
[15] val_loss=2.9844  acc=0.3929
[16] val_loss=3.0898  acc=0.3643
[17] val_loss=2.6993  acc=0.4357
[18] val_loss=2.8806  acc=0.4214
[19] val_loss=2.9906  acc=0.3857
⏹ Early stopped at epoch 19 (no improvement for 15 epochs).


##### RT_slit_beam

In [134]:
DATA_DIR = Path("/camin1/chlee/Data/cornea_image/segmented/Rt/slit_beam/split")  # <- 본인 경로로 변경
TRAIN_DIR    = DATA_DIR / "train"
VAL_DIR      = DATA_DIR / "val"

model = timm.create_model(MODEL_ID, pretrained=True, num_classes=NUM_CLASSES)

# 안전하게 분류기 리셋
if hasattr(model, "reset_classifier"):
    model.reset_classifier(num_classes=NUM_CLASSES)
model.to(DEVICE)


# ---------- transforms (LongestSide + square pad) ----------
cfg = resolve_model_data_config(model)  # mean/std


pad_fill = tuple(int(m*255) for m in cfg['mean'])  # 배경을 mean 색으로 패딩

train_tf = transforms.Compose([
    ResizeLongestSide(TARGET_SIZE),
    PadToSquare(TARGET_SIZE, fill=pad_fill),
    # 의료: 강한 크롭/플립 제거, 약한 회전만
    transforms.RandomApply([transforms.RandomRotation(5, interpolation=InterpolationMode.BILINEAR)], p=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=cfg['mean'], std=cfg['std']),
])

val_tf = transforms.Compose([
    ResizeLongestSide(TARGET_SIZE),
    PadToSquare(TARGET_SIZE, fill=pad_fill),
    transforms.ToTensor(),
    transforms.Normalize(mean=cfg['mean'], std=cfg['std']),
])


# ---------- datasets / loaders ----------
train_ds = ImageFolder(TRAIN_DIR, transform=train_tf)
val_ds   = ImageFolder(VAL_DIR,   transform=val_tf)


print("train classes:", train_ds.class_to_idx)
print("val classes:",   val_ds.class_to_idx)
print("train counts:",  Counter(train_ds.targets))
print("val counts:",    Counter(val_ds.targets))


train classes: {'0': 0, '1': 1, '2': 2, '3': 3}
val classes: {'0': 0, '1': 1, '2': 2, '3': 3}
train counts: Counter({2: 63, 0: 60, 3: 43, 1: 10})
val counts: Counter({2: 49, 0: 41, 3: 22, 1: 12})


In [135]:
targets = torch.tensor(train_ds.targets)
class_counts = torch.bincount(targets, minlength=NUM_CLASSES).float()
# 각 클래스 반비례 가중치(총합 정규화)
weights = (class_counts.sum() / (class_counts + 1e-6)).to(DEVICE)

criterion = nn.CrossEntropyLoss(
    weight=weights,
    label_smoothing=0.1,  # 작은 데이터에 도움
).to(DEVICE)

train_loader = DataLoader(
    train_ds, batch_size=BATCH_SIZE, shuffle=True,
    num_workers=NUM_WORKERS, pin_memory=True
)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False,
                          num_workers=NUM_WORKERS, pin_memory=True)


# ---------- partial fine-tune (head + last stages) ----------
for p in model.parameters():
    p.requires_grad = False


# head 활성화
if hasattr(model, "head"):
    for p in model.head.parameters():
        p.requires_grad = True
# 마지막 N개 stage 활성화
if hasattr(model, "stages"):
    total = len(model.stages)
    start = max(0, total - UNFREEZE_STAGES)
    for i in range(start, total):
        for p in model.stages[i].parameters():
            p.requires_grad = True


In [136]:


# 파라미터 그룹 (head vs backbone)
head_params, backbone_params = [], []
for n, p in model.named_parameters():
    if p.requires_grad:
        if ("head" in n) or ("classifier" in n):
            head_params.append(p)
        else:
            backbone_params.append(p)


UNFREEZE_STAGES = 4  # 전부
for p in model.parameters(): p.requires_grad = False
for p in model.head.parameters(): p.requires_grad = True
for i in range(len(model.stages) - UNFREEZE_STAGES, len(model.stages)):
    for p in model.stages[i].parameters():
        p.requires_grad = True

optimizer = torch.optim.AdamW([
    {"params": [p for n,p in model.named_parameters() if p.requires_grad and "head" not in n], "lr": 5e-5},
    {"params": [p for n,p in model.named_parameters() if p.requires_grad and ("head" in n or "classifier" in n)], "lr": 3e-4},
], weight_decay=0.01)


num_train_steps = EPOCHS * len(train_loader)
scheduler = get_cosine_schedule_with_warmup(
    optimizer,
    num_warmup_steps=int(0.1 * num_train_steps),
    num_training_steps=num_train_steps
)
criterion = nn.CrossEntropyLoss().to(DEVICE)
scaler = torch.amp.GradScaler(enabled=(DEVICE=="cuda"))


In [137]:

# ---------- training ----------
print("trainable params:", sum(p.numel() for p in model.parameters() if p.requires_grad))

trainable params: 87690372


In [138]:
# ==== EarlyStopping 파라미터 ====
best_loss = float('inf')
patience = 15         # 권장: 10~15
min_delta = 1e-3      # 이만큼 내려가야 “개선”으로 인정
no_improve = 0

for ep in range(1, EPOCHS + 1):
    model.train()
    running = 0.0
    for step, (x, y) in enumerate(train_loader, 1):
        x, y = x.to(DEVICE), y.to(DEVICE)
        with torch.amp.autocast(device_type=DEVICE, enabled=(DEVICE=="cuda")):
            logits = model(x)
            loss = criterion(logits, y)

        optimizer.zero_grad(set_to_none=True)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()  # per-step 스케줄 유지

        running += loss.item()
        if step % 50 == 0:
            print(f"ep {ep} | step {step}/{len(train_loader)} | train_loss {running/50:.4f}")
            running = 0.0

    val_loss, val_acc = evaluate()
    print(f"[{ep}] val_loss={val_loss:.4f}  acc={val_acc:.4f}")

    # 저장 & EarlyStopping 로직
    if best_loss - val_loss > min_delta:
        best_loss = val_loss
        no_improve = 0
        # torch.save(model.state_dict(), "/camin1/chlee/jupyter/Keratopathy AI Project/[25-08-11]/convnextv2_base_384_cornea_best.pt")
        # print("💾 saved: convnextv2_base_384_cornea_best.pt")
    else:
        no_improve += 1
        if no_improve >= patience:
            print(f"⏹ Early stopped at epoch {ep} (no improvement for {patience} epochs).")
            break



[1] val_loss=1.2931  acc=0.2661
[2] val_loss=1.3154  acc=0.3306
[3] val_loss=1.1804  acc=0.4839
[4] val_loss=1.2042  acc=0.4597
[5] val_loss=1.4347  acc=0.3790
[6] val_loss=1.3659  acc=0.5161
[7] val_loss=1.6286  acc=0.4274
[8] val_loss=1.6547  acc=0.5403
[9] val_loss=2.1419  acc=0.5323
[10] val_loss=2.4690  acc=0.4597
[11] val_loss=1.9957  acc=0.4839
[12] val_loss=1.9168  acc=0.5000
[13] val_loss=2.1385  acc=0.5726
[14] val_loss=2.0271  acc=0.4597
[15] val_loss=2.3311  acc=0.4839
[16] val_loss=2.8440  acc=0.4194
[17] val_loss=2.0318  acc=0.5081
[18] val_loss=2.3728  acc=0.4516
⏹ Early stopped at epoch 18 (no improvement for 15 epochs).
