In [1]:
# ============================================================
# train_effnet_vit_ensemble_autoweight_v7_mem.py
# 3090(24GB) 메모리 최적화 안정판
# - Stratified 5-Fold, 공통 split 고정
# - EfficientNet-B3(380), ViT-B/16(384)
# - Mild Aug, Mixup, LabelSmoothing, AMP(fp16), channels_last
# - ViT grad-checkpointing, 자동 OOM 배치 축소
# - Dataset-batched 8-way TTA (메모리 피크 최소화)
# - OOF/Test probs float16 저장
# - OCR OOF/Test 안전 정렬 + Auto-weight grid search
# ============================================================

import os, glob, json, random, math
import numpy as np
import pandas as pd
import cv2, timm, torch, albumentations as A
import torch.nn as nn
from PIL import Image
from tqdm import tqdm
from albumentations.pytorch import ToTensorV2
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score, accuracy_score
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader

# ----------------------------
# Repro & Device
# ----------------------------
SEED = 42
os.environ["PYTHONHASHSEED"] = str(SEED)
random.seed(SEED); np.random.seed(SEED)
torch.manual_seed(SEED); torch.cuda.manual_seed_all(SEED)
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
if hasattr(torch, "set_float32_matmul_precision"):
    torch.set_float32_matmul_precision("high")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device} ({torch.cuda.get_device_name(0) if device.type=='cuda' else 'CPU'})")



Using cuda (NVIDIA GeForce RTX 3090)


In [2]:
# ----------------------------
# Paths (v6 정합)
# ----------------------------
BASE = "/data/ephemeral/home/data"
TRAIN_IMG_DIR = f"{BASE}/processed/stage0_6_train_v6/"
TEST_IMG_DIR  = f"{BASE}/processed/stage0_6_test_v6/"
TRAIN_META    = f"{BASE}/meta_stage0_6_train_v6.csv"
TRAIN_CSV     = f"{BASE}/raw/train.csv"
SUB_CSV       = f"{BASE}/raw/sample_submission.csv"

# OCR 확률 (있으면 사용)
OCR_TEST_PROBS_CSV  = f"{BASE}/interim/ocr_test_probs.csv"
OCR_VALID_PROBS_DIR = f"{BASE}/interim/ocr_valid_probs"

os.makedirs("./models", exist_ok=True)
os.makedirs("./oofs", exist_ok=True)
os.makedirs("./test_probs", exist_ok=True)

# ----------------------------
# Config
# ----------------------------
NUM_CLASSES = 17
NFOLDS = 5
EPOCHS = 30
INIT_BATCH_SIZE = 16        # 3090 기준 권장 시작값
NUM_WORKERS = 6
LR = 1e-4
WEIGHT_DECAY = 1e-4
LABEL_SMOOTH = 0.05
MIXUP_ALPHA = 0.2
MIXUP_PROB = 0.5

BACKBONES = [
    dict(name="tf_efficientnet_b3_ns", img_size=380, drop_rate=0.4, drop_path=0.2, grad_ckpt=False),
    dict(name="vit_base_patch16_384",  img_size=384, drop_rate=0.2, drop_path=0.1, grad_ckpt=True),
]

USE_TEMPERATURE = False
TEMP_T = 1.5



In [3]:
# ----------------------------
# Utils
# ----------------------------
def safe_glob(base_dir, basename):
    cands = glob.glob(os.path.join(base_dir, "**", basename), recursive=True)
    return cands[0] if cands else None

def build_transforms(img_size):
    trn = A.Compose([
        A.Resize(img_size, img_size),
        A.Rotate(limit=10, border_mode=cv2.BORDER_REFLECT_101, p=0.4),
        A.HorizontalFlip(p=0.5),
        A.RandomBrightnessContrast(0.15, 0.15, p=0.4),
        A.GaussNoise(var_limit=(5, 20), p=0.3),
        A.Normalize(mean=(0.485, 0.456, 0.406),
                    std=(0.229, 0.224, 0.225)),
        ToTensorV2(),
    ])
    tst = A.Compose([
        A.Resize(img_size, img_size),
        A.Normalize(mean=(0.485, 0.456, 0.406),
                    std=(0.229, 0.224, 0.225)),
        ToTensorV2(),
    ])
    return trn, tst

def do_mixup(images, targets, alpha=MIXUP_ALPHA):
    if alpha <= 0:
        return images, targets, torch.ones(len(targets), device=targets.device), targets
    lam = np.random.beta(alpha, alpha)
    idx = torch.randperm(images.size(0), device=images.device)
    mixed = lam * images + (1 - lam) * images[idx]
    t1, t2 = targets, targets[idx]
    lam_tensor = torch.full((images.size(0),), lam, device=targets.device, dtype=torch.float32)
    return mixed, (t1, t2), lam_tensor, idx

def mixup_criterion(criterion, preds, targets_tuple, lam_tensor):
    t1, t2 = targets_tuple
    loss1 = criterion(preds, t1)
    loss2 = criterion(preds, t2)
    return (lam_tensor * loss1 + (1 - lam_tensor) * loss2).mean()

def apply_temperature_np(probs, T=1.5):
    logits = np.log(np.clip(probs, 1e-12, 1.0))
    logits /= T
    exps = np.exp(logits - logits.max(axis=1, keepdims=True))
    return exps / exps.sum(axis=1, keepdims=True)



In [4]:
# ----------------------------
# Dataset
# ----------------------------
class ImageDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform
    def __len__(self): return len(self.df)
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = np.array(Image.open(row["filepath"]).convert("RGB"))
        target = int(row["target"])
        if self.transform:
            img = self.transform(image=img)["image"]
        return img, target

class TestDataset(Dataset):
    def __init__(self, filepaths, transform=None):
        self.paths = filepaths; self.transform = transform
    def __len__(self): return len(self.paths)
    def __getitem__(self, idx):
        img = np.array(Image.open(self.paths[idx]).convert("RGB"))
        if self.transform:
            img = self.transform(image=img)["image"]
        return img

# ----------------------------
# Model factory
# ----------------------------
def create_model(model_name, drop_rate=0.2, drop_path=0.1, grad_ckpt=False):
    model = timm.create_model(
        model_name, pretrained=True, num_classes=NUM_CLASSES,
        in_chans=3, drop_rate=drop_rate, drop_path_rate=drop_path
    )
    if grad_ckpt and hasattr(model, "set_grad_checkpointing"):
        model.set_grad_checkpointing(True)
    model = model.to(device)
    model = model.to(memory_format=torch.channels_last)
    return model



In [5]:
# ----------------------------
# Train / Valid (자동 OOM 대처)
# ----------------------------
def train_one_epoch(loader, model, optimizer, loss_fn, scaler=None):
    model.train()
    total_loss, preds_list, targets_list = 0.0, [], []
    for images, targets in tqdm(loader, desc="Train", leave=False):
        images = images.to(device, non_blocking=True)
        targets = targets.to(device, non_blocking=True)
        use_mix = (np.random.rand() < MIXUP_PROB)
        if use_mix:
            images, (t1, t2), lam_tensor, _ = do_mixup(images, targets, alpha=MIXUP_ALPHA)

        optimizer.zero_grad(set_to_none=True)
        try:
            with torch.autocast(device_type="cuda", dtype=torch.float16, enabled=(device.type=="cuda")):
                preds = model(images)
                loss = mixup_criterion(loss_fn, preds, (t1, t2), lam_tensor) if use_mix else loss_fn(preds, targets).mean()
            if scaler and device.type=="cuda":
                scaler.scale(loss).backward(); scaler.step(optimizer); scaler.update()
            else:
                loss.backward(); optimizer.step()
        except RuntimeError as e:
            if "out of memory" in str(e).lower():
                torch.cuda.empty_cache()
                raise e
            else:
                raise e

        total_loss += loss.item()
        preds_list.extend(preds.argmax(1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

    return {"loss": total_loss/len(loader),
            "acc": accuracy_score(targets_list, preds_list),
            "f1": f1_score(targets_list, preds_list, average="macro")}

@torch.no_grad()
def validate(loader, model, loss_fn):
    model.eval()
    total_loss, preds_list, targets_list = 0.0, [], []
    all_probs = []
    for images, targets in tqdm(loader, desc="Valid", leave=False):
        images = images.to(device, non_blocking=True)
        targets = targets.to(device, non_blocking=True)
        with torch.autocast(device_type="cuda", dtype=torch.float16, enabled=(device.type=="cuda")):
            logits = model(images)
            loss = loss_fn(logits, targets).mean()
            probs = torch.softmax(logits, dim=1).to(torch.float16).cpu().numpy()
        total_loss += loss.item()
        all_probs.append(probs)
        preds_list.extend(probs.argmax(1))
        targets_list.extend(targets.cpu().numpy())
    all_probs = np.vstack(all_probs)
    return {"loss": total_loss/len(loader),
            "acc": accuracy_score(targets_list, preds_list),
            "f1": f1_score(targets_list, preds_list, average="macro"),
            "probs": all_probs, "targets": np.array(targets_list)}

def make_loader(df, transform, batch_size, shuffle, drop_last):
    ds = ImageDataset(df, transform)
    return DataLoader(ds, batch_size=batch_size, shuffle=shuffle,
                      num_workers=NUM_WORKERS, pin_memory=True,
                      drop_last=drop_last, persistent_workers=True)

def autotune_batch_size(start_bs, fn_make_loader, try_steps=(1, 0.75, 0.5, 0.25)):
    for scale in try_steps:
        bs = max(1, int(start_bs * scale))
        try:
            loader = fn_make_loader(bs)
            return loader, bs
        except RuntimeError as e:
            if "out of memory" in str(e).lower():
                torch.cuda.empty_cache()
                continue
            else:
                raise e
    raise RuntimeError("Unable to allocate dataloader without OOM.")



In [6]:
# ----------------------------
# Load & join data
# ----------------------------
meta = pd.read_csv(TRAIN_META)
train_csv = pd.read_csv(TRAIN_CSV)
if "basename" not in meta.columns:
    meta["basename"] = meta["filepath"].apply(os.path.basename)
train_csv["basename"] = train_csv["ID"].apply(lambda x: f"{x}.jpg" if not str(x).endswith(".jpg") else x)
df = pd.merge(meta, train_csv[["basename","target"]], on="basename", how="left")
df = df.dropna(subset=["target"]).reset_index(drop=True)
df["target"] = df["target"].astype(int)

# 공통 split 고정
skf = StratifiedKFold(n_splits=NFOLDS, shuffle=True, random_state=SEED)
SPLITS = list(skf.split(df, df["target"]))



In [7]:
# ----------------------------
# Train per backbone (save OOF)
# ----------------------------
all_backbone_fold_scores = {}

for bb in BACKBONES:
    model_name = bb["name"]; img_size = bb["img_size"]
    print(f"\n========== Backbone: {model_name} ({img_size}) ==========")
    trn_tf, tst_tf = build_transforms(img_size)
    fold_scores = []

    for fold, (trn_idx, val_idx) in enumerate(SPLITS):
        print(f"\n----- {model_name} | Fold {fold+1}/{NFOLDS} -----")
        trn_df, val_df = df.iloc[trn_idx].copy(), df.iloc[val_idx].copy()

        # 파일 경로 매핑
        trn_df["filepath"] = trn_df["basename"].apply(lambda b: safe_glob(TRAIN_IMG_DIR, b))
        val_df["filepath"] = val_df["basename"].apply(lambda b: safe_glob(TRAIN_IMG_DIR, b))
        trn_df = trn_df.dropna(subset=["filepath"]).reset_index(drop=True)
        val_df = val_df.dropna(subset=["filepath"]).reset_index(drop=True)

        def _mk_tr_loader(bs):
            return make_loader(trn_df, trn_tf, bs, shuffle=True, drop_last=True)
        def _mk_va_loader(bs):
            return make_loader(val_df, tst_tf, bs, shuffle=False, drop_last=False)

        trn_loader, bs_tr = autotune_batch_size(INIT_BATCH_SIZE, _mk_tr_loader)
        val_loader, bs_va = autotune_batch_size(INIT_BATCH_SIZE, _mk_va_loader)
        print(f"[BS] Train={bs_tr}, Valid={bs_va}")

        model = create_model(model_name, bb["drop_rate"], bb["drop_path"], bb["grad_ckpt"])
        loss_fn = nn.CrossEntropyLoss(label_smoothing=LABEL_SMOOTH, reduction="none")
        optimizer = AdamW(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS, eta_min=1e-6)
        scaler = torch.cuda.amp.GradScaler(enabled=(device.type=="cuda"))

        best_f1 = -1.0; best_val = None
        best_path = f"./models/{model_name}_fold{fold}.pt"
        basenames_val = val_df["basename"].tolist()

        for epoch in range(EPOCHS):
            tr = train_one_epoch(trn_loader, model, optimizer, loss_fn, scaler)
            va = validate(val_loader, model, loss_fn)
            scheduler.step()
            print(f"[{model_name}][Fold {fold}] Ep {epoch+1}/{EPOCHS} | TrF1={tr['f1']:.4f} VaF1={va['f1']:.4f}")
            if va["f1"] > best_f1:
                best_f1 = va["f1"]; best_val = va
                torch.save(model.state_dict(), best_path)
        fold_scores.append(best_f1)

        # OOF 저장 (float16)
        np.save(f"./oofs/{model_name}_fold{fold}_probs.npy", best_val["probs"].astype(np.float16))
        np.save(f"./oofs/{model_name}_fold{fold}_targets.npy", best_val["targets"])
        np.save(f"./oofs/{model_name}_fold{fold}_basenames.npy", np.array(basenames_val))

        del model; torch.cuda.empty_cache()

    all_backbone_fold_scores[model_name] = fold_scores
    print(f">>> {model_name} fold F1: {fold_scores} | mean={np.mean(fold_scores):.4f}")





----- tf_efficientnet_b3_ns | Fold 1/5 -----


  A.GaussNoise(var_limit=(5, 20), p=0.3),


[BS] Train=16, Valid=16


  model = create_fn(


model.safetensors:   0%|          | 0.00/49.3M [00:00<?, ?B/s]

  scaler = torch.cuda.amp.GradScaler(enabled=(device.type=="cuda"))
                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 0] Ep 1/30 | TrF1=0.2059 VaF1=0.6842


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 0] Ep 2/30 | TrF1=0.4752 VaF1=0.8357


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 0] Ep 3/30 | TrF1=0.5139 VaF1=0.8506


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 0] Ep 4/30 | TrF1=0.5993 VaF1=0.8838


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 0] Ep 5/30 | TrF1=0.6269 VaF1=0.8763


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 0] Ep 6/30 | TrF1=0.6993 VaF1=0.9029


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 0] Ep 7/30 | TrF1=0.6590 VaF1=0.8893


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 0] Ep 8/30 | TrF1=0.6806 VaF1=0.8959


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 0] Ep 9/30 | TrF1=0.6888 VaF1=0.9101


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 0] Ep 10/30 | TrF1=0.7221 VaF1=0.9023


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 0] Ep 11/30 | TrF1=0.6872 VaF1=0.9168


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 0] Ep 12/30 | TrF1=0.7499 VaF1=0.9193


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 0] Ep 13/30 | TrF1=0.6833 VaF1=0.9306


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 0] Ep 14/30 | TrF1=0.7528 VaF1=0.9300


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 0] Ep 15/30 | TrF1=0.6795 VaF1=0.9268


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 0] Ep 16/30 | TrF1=0.7479 VaF1=0.9348


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 0] Ep 17/30 | TrF1=0.7385 VaF1=0.9349


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 0] Ep 18/30 | TrF1=0.7243 VaF1=0.9384


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 0] Ep 19/30 | TrF1=0.6905 VaF1=0.9426


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 0] Ep 20/30 | TrF1=0.7216 VaF1=0.9459


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 0] Ep 21/30 | TrF1=0.7273 VaF1=0.9331


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 0] Ep 22/30 | TrF1=0.7548 VaF1=0.9407


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 0] Ep 23/30 | TrF1=0.7696 VaF1=0.9352


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 0] Ep 24/30 | TrF1=0.7634 VaF1=0.9313


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 0] Ep 25/30 | TrF1=0.7568 VaF1=0.9348


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 0] Ep 26/30 | TrF1=0.7991 VaF1=0.9327


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 0] Ep 27/30 | TrF1=0.6861 VaF1=0.9366


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 0] Ep 28/30 | TrF1=0.7568 VaF1=0.9361


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 0] Ep 29/30 | TrF1=0.7323 VaF1=0.9284


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 0] Ep 30/30 | TrF1=0.7665 VaF1=0.9352

----- tf_efficientnet_b3_ns | Fold 2/5 -----
[BS] Train=16, Valid=16


  model = create_fn(
  scaler = torch.cuda.amp.GradScaler(enabled=(device.type=="cuda"))
                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 1] Ep 1/30 | TrF1=0.2176 VaF1=0.7405


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 1] Ep 2/30 | TrF1=0.4407 VaF1=0.8388


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 1] Ep 3/30 | TrF1=0.5505 VaF1=0.8664


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 1] Ep 4/30 | TrF1=0.5974 VaF1=0.8776


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 1] Ep 5/30 | TrF1=0.7001 VaF1=0.8750


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 1] Ep 6/30 | TrF1=0.6371 VaF1=0.8985


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 1] Ep 7/30 | TrF1=0.7368 VaF1=0.9091


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 1] Ep 8/30 | TrF1=0.6824 VaF1=0.9122


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 1] Ep 9/30 | TrF1=0.6713 VaF1=0.9004


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 1] Ep 10/30 | TrF1=0.7285 VaF1=0.9133


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 1] Ep 11/30 | TrF1=0.7207 VaF1=0.9208


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 1] Ep 12/30 | TrF1=0.7099 VaF1=0.9099


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 1] Ep 13/30 | TrF1=0.5927 VaF1=0.9185


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 1] Ep 14/30 | TrF1=0.7348 VaF1=0.9157


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 1] Ep 15/30 | TrF1=0.6631 VaF1=0.9162


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 1] Ep 16/30 | TrF1=0.7597 VaF1=0.9190


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 1] Ep 17/30 | TrF1=0.7209 VaF1=0.9167


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 1] Ep 18/30 | TrF1=0.8000 VaF1=0.9282


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 1] Ep 19/30 | TrF1=0.6877 VaF1=0.9217


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 1] Ep 20/30 | TrF1=0.7884 VaF1=0.9277


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 1] Ep 21/30 | TrF1=0.6577 VaF1=0.9292


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 1] Ep 22/30 | TrF1=0.7396 VaF1=0.9289


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 1] Ep 23/30 | TrF1=0.7318 VaF1=0.9372


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 1] Ep 24/30 | TrF1=0.6813 VaF1=0.9280


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 1] Ep 25/30 | TrF1=0.7446 VaF1=0.9263


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 1] Ep 26/30 | TrF1=0.8044 VaF1=0.9322


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 1] Ep 27/30 | TrF1=0.7246 VaF1=0.9319


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 1] Ep 28/30 | TrF1=0.7347 VaF1=0.9311


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 1] Ep 29/30 | TrF1=0.6730 VaF1=0.9312


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 1] Ep 30/30 | TrF1=0.7158 VaF1=0.9384

----- tf_efficientnet_b3_ns | Fold 3/5 -----
[BS] Train=16, Valid=16


  model = create_fn(
  scaler = torch.cuda.amp.GradScaler(enabled=(device.type=="cuda"))
                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 2] Ep 1/30 | TrF1=0.1991 VaF1=0.7398


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 2] Ep 2/30 | TrF1=0.4452 VaF1=0.8206


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 2] Ep 3/30 | TrF1=0.5099 VaF1=0.8758


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 2] Ep 4/30 | TrF1=0.6133 VaF1=0.8769


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 2] Ep 5/30 | TrF1=0.6092 VaF1=0.8672


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 2] Ep 6/30 | TrF1=0.6632 VaF1=0.8758


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 2] Ep 7/30 | TrF1=0.7116 VaF1=0.9070


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 2] Ep 8/30 | TrF1=0.6518 VaF1=0.8995


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 2] Ep 9/30 | TrF1=0.7224 VaF1=0.9112


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 2] Ep 10/30 | TrF1=0.6636 VaF1=0.9146


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 2] Ep 11/30 | TrF1=0.7320 VaF1=0.9222


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 2] Ep 12/30 | TrF1=0.7161 VaF1=0.9263


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 2] Ep 13/30 | TrF1=0.6889 VaF1=0.9273


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 2] Ep 14/30 | TrF1=0.7512 VaF1=0.9164


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 2] Ep 15/30 | TrF1=0.7896 VaF1=0.9302


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 2] Ep 16/30 | TrF1=0.6905 VaF1=0.9308


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 2] Ep 17/30 | TrF1=0.8020 VaF1=0.9261


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 2] Ep 18/30 | TrF1=0.7381 VaF1=0.9332


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 2] Ep 19/30 | TrF1=0.7587 VaF1=0.9365


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 2] Ep 20/30 | TrF1=0.6608 VaF1=0.9365


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 2] Ep 21/30 | TrF1=0.7906 VaF1=0.9322


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 2] Ep 22/30 | TrF1=0.7233 VaF1=0.9365


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 2] Ep 23/30 | TrF1=0.7597 VaF1=0.9455


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 2] Ep 24/30 | TrF1=0.7490 VaF1=0.9363


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 2] Ep 25/30 | TrF1=0.7438 VaF1=0.9396


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 2] Ep 26/30 | TrF1=0.7166 VaF1=0.9307


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 2] Ep 27/30 | TrF1=0.7691 VaF1=0.9286


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 2] Ep 28/30 | TrF1=0.7808 VaF1=0.9282


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 2] Ep 29/30 | TrF1=0.7565 VaF1=0.9307


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 2] Ep 30/30 | TrF1=0.7345 VaF1=0.9368

----- tf_efficientnet_b3_ns | Fold 4/5 -----
[BS] Train=16, Valid=16


  model = create_fn(
  scaler = torch.cuda.amp.GradScaler(enabled=(device.type=="cuda"))
                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 3] Ep 1/30 | TrF1=0.2268 VaF1=0.7155


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 3] Ep 2/30 | TrF1=0.4438 VaF1=0.8364


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 3] Ep 3/30 | TrF1=0.5392 VaF1=0.8597


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 3] Ep 4/30 | TrF1=0.5120 VaF1=0.8634


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 3] Ep 5/30 | TrF1=0.6074 VaF1=0.8877


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 3] Ep 6/30 | TrF1=0.6546 VaF1=0.8708


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 3] Ep 7/30 | TrF1=0.6569 VaF1=0.9059


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 3] Ep 8/30 | TrF1=0.7234 VaF1=0.8822


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 3] Ep 9/30 | TrF1=0.7223 VaF1=0.8974


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 3] Ep 10/30 | TrF1=0.7828 VaF1=0.9072


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 3] Ep 11/30 | TrF1=0.7082 VaF1=0.8904


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 3] Ep 12/30 | TrF1=0.7714 VaF1=0.9043


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 3] Ep 13/30 | TrF1=0.8194 VaF1=0.9020


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 3] Ep 14/30 | TrF1=0.7461 VaF1=0.9040


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 3] Ep 15/30 | TrF1=0.7870 VaF1=0.9044


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 3] Ep 16/30 | TrF1=0.7417 VaF1=0.9026


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 3] Ep 17/30 | TrF1=0.7914 VaF1=0.9210


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 3] Ep 18/30 | TrF1=0.7406 VaF1=0.9057


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 3] Ep 19/30 | TrF1=0.7498 VaF1=0.9143


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 3] Ep 20/30 | TrF1=0.7286 VaF1=0.9036


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 3] Ep 21/30 | TrF1=0.6792 VaF1=0.9164


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 3] Ep 22/30 | TrF1=0.7172 VaF1=0.9081


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 3] Ep 23/30 | TrF1=0.7115 VaF1=0.9166


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 3] Ep 24/30 | TrF1=0.7616 VaF1=0.9222


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 3] Ep 25/30 | TrF1=0.7482 VaF1=0.9169


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 3] Ep 26/30 | TrF1=0.7943 VaF1=0.9146


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 3] Ep 27/30 | TrF1=0.7720 VaF1=0.9096


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 3] Ep 28/30 | TrF1=0.8066 VaF1=0.9120


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 3] Ep 29/30 | TrF1=0.7504 VaF1=0.8986


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 3] Ep 30/30 | TrF1=0.8085 VaF1=0.9103

----- tf_efficientnet_b3_ns | Fold 5/5 -----
[BS] Train=16, Valid=16


  model = create_fn(
  scaler = torch.cuda.amp.GradScaler(enabled=(device.type=="cuda"))
                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 4] Ep 1/30 | TrF1=0.2189 VaF1=0.6087


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 4] Ep 2/30 | TrF1=0.4515 VaF1=0.8215


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 4] Ep 3/30 | TrF1=0.5235 VaF1=0.8419


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 4] Ep 4/30 | TrF1=0.5872 VaF1=0.8682


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 4] Ep 5/30 | TrF1=0.6106 VaF1=0.8951


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 4] Ep 6/30 | TrF1=0.6259 VaF1=0.9343


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 4] Ep 7/30 | TrF1=0.7244 VaF1=0.9320


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 4] Ep 8/30 | TrF1=0.7031 VaF1=0.9161


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 4] Ep 9/30 | TrF1=0.7470 VaF1=0.9183


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 4] Ep 10/30 | TrF1=0.7905 VaF1=0.9267


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 4] Ep 11/30 | TrF1=0.7294 VaF1=0.9240


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 4] Ep 12/30 | TrF1=0.7221 VaF1=0.9210


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 4] Ep 13/30 | TrF1=0.6923 VaF1=0.9237


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 4] Ep 14/30 | TrF1=0.7749 VaF1=0.9242


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 4] Ep 15/30 | TrF1=0.7784 VaF1=0.9335


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 4] Ep 16/30 | TrF1=0.7737 VaF1=0.9180


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 4] Ep 17/30 | TrF1=0.7330 VaF1=0.9163


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 4] Ep 18/30 | TrF1=0.6943 VaF1=0.9141


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 4] Ep 19/30 | TrF1=0.7469 VaF1=0.9184


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 4] Ep 20/30 | TrF1=0.8145 VaF1=0.9221


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 4] Ep 21/30 | TrF1=0.8107 VaF1=0.9208


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 4] Ep 22/30 | TrF1=0.7892 VaF1=0.9039


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 4] Ep 23/30 | TrF1=0.7380 VaF1=0.9279


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 4] Ep 24/30 | TrF1=0.7599 VaF1=0.9219


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 4] Ep 25/30 | TrF1=0.7400 VaF1=0.9174


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 4] Ep 26/30 | TrF1=0.7467 VaF1=0.9188


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 4] Ep 27/30 | TrF1=0.7488 VaF1=0.9190


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 4] Ep 28/30 | TrF1=0.7027 VaF1=0.9184


                                                                                                                                                               

[tf_efficientnet_b3_ns][Fold 4] Ep 29/30 | TrF1=0.7481 VaF1=0.9257


  A.GaussNoise(var_limit=(5, 20), p=0.3),


[tf_efficientnet_b3_ns][Fold 4] Ep 30/30 | TrF1=0.8049 VaF1=0.9171
>>> tf_efficientnet_b3_ns fold F1: [0.9459136413307054, 0.9383800730230426, 0.9454683621044436, 0.9222212436341385, 0.9343481922693582] | mean=0.9373


----- vit_base_patch16_384 | Fold 1/5 -----
[BS] Train=16, Valid=16


model.safetensors:   0%|          | 0.00/347M [00:00<?, ?B/s]

  scaler = torch.cuda.amp.GradScaler(enabled=(device.type=="cuda"))
                                                                                                                                                               

[vit_base_patch16_384][Fold 0] Ep 1/30 | TrF1=0.1415 VaF1=0.3562


                                                                                                                                                               

[vit_base_patch16_384][Fold 0] Ep 2/30 | TrF1=0.3702 VaF1=0.6208


                                                                                                                                                               

[vit_base_patch16_384][Fold 0] Ep 3/30 | TrF1=0.5574 VaF1=0.7771


                                                                                                                                                               

[vit_base_patch16_384][Fold 0] Ep 4/30 | TrF1=0.5503 VaF1=0.7851


                                                                                                                                                               

[vit_base_patch16_384][Fold 0] Ep 5/30 | TrF1=0.6638 VaF1=0.8159


                                                                                                                                                               

[vit_base_patch16_384][Fold 0] Ep 6/30 | TrF1=0.6332 VaF1=0.8615


                                                                                                                                                               

[vit_base_patch16_384][Fold 0] Ep 7/30 | TrF1=0.7029 VaF1=0.8317


                                                                                                                                                               

[vit_base_patch16_384][Fold 0] Ep 8/30 | TrF1=0.6584 VaF1=0.8443


                                                                                                                                                               

[vit_base_patch16_384][Fold 0] Ep 9/30 | TrF1=0.6368 VaF1=0.8819


                                                                                                                                                               

[vit_base_patch16_384][Fold 0] Ep 10/30 | TrF1=0.7387 VaF1=0.8488


                                                                                                                                                               

[vit_base_patch16_384][Fold 0] Ep 11/30 | TrF1=0.7567 VaF1=0.8788


                                                                                                                                                               

[vit_base_patch16_384][Fold 0] Ep 12/30 | TrF1=0.7538 VaF1=0.8810


                                                                                                                                                               

[vit_base_patch16_384][Fold 0] Ep 13/30 | TrF1=0.7522 VaF1=0.8851


                                                                                                                                                               

[vit_base_patch16_384][Fold 0] Ep 14/30 | TrF1=0.7241 VaF1=0.8766


                                                                                                                                                               

[vit_base_patch16_384][Fold 0] Ep 15/30 | TrF1=0.7104 VaF1=0.8687


                                                                                                                                                               

[vit_base_patch16_384][Fold 0] Ep 16/30 | TrF1=0.7825 VaF1=0.8721


                                                                                                                                                               

[vit_base_patch16_384][Fold 0] Ep 17/30 | TrF1=0.7275 VaF1=0.8854


                                                                                                                                                               

[vit_base_patch16_384][Fold 0] Ep 18/30 | TrF1=0.6543 VaF1=0.8696


                                                                                                                                                               

[vit_base_patch16_384][Fold 0] Ep 19/30 | TrF1=0.7394 VaF1=0.9013


                                                                                                                                                               

[vit_base_patch16_384][Fold 0] Ep 20/30 | TrF1=0.7684 VaF1=0.8941


                                                                                                                                                               

[vit_base_patch16_384][Fold 0] Ep 21/30 | TrF1=0.7180 VaF1=0.8964


                                                                                                                                                               

[vit_base_patch16_384][Fold 0] Ep 22/30 | TrF1=0.8055 VaF1=0.9184


                                                                                                                                                               

[vit_base_patch16_384][Fold 0] Ep 23/30 | TrF1=0.7104 VaF1=0.9162


                                                                                                                                                               

[vit_base_patch16_384][Fold 0] Ep 24/30 | TrF1=0.7698 VaF1=0.8950


                                                                                                                                                               

[vit_base_patch16_384][Fold 0] Ep 25/30 | TrF1=0.7261 VaF1=0.9014


                                                                                                                                                               

[vit_base_patch16_384][Fold 0] Ep 26/30 | TrF1=0.7819 VaF1=0.8970


                                                                                                                                                               

[vit_base_patch16_384][Fold 0] Ep 27/30 | TrF1=0.7411 VaF1=0.9024


                                                                                                                                                               

[vit_base_patch16_384][Fold 0] Ep 28/30 | TrF1=0.6913 VaF1=0.9035


                                                                                                                                                               

[vit_base_patch16_384][Fold 0] Ep 29/30 | TrF1=0.7958 VaF1=0.8981


                                                                                                                                                               

[vit_base_patch16_384][Fold 0] Ep 30/30 | TrF1=0.7471 VaF1=0.9040

----- vit_base_patch16_384 | Fold 2/5 -----
[BS] Train=16, Valid=16


  scaler = torch.cuda.amp.GradScaler(enabled=(device.type=="cuda"))
                                                                                                                                                               

[vit_base_patch16_384][Fold 1] Ep 1/30 | TrF1=0.1299 VaF1=0.3519


                                                                                                                                                               

[vit_base_patch16_384][Fold 1] Ep 2/30 | TrF1=0.3733 VaF1=0.6060


                                                                                                                                                               

[vit_base_patch16_384][Fold 1] Ep 3/30 | TrF1=0.4983 VaF1=0.8004


                                                                                                                                                               

[vit_base_patch16_384][Fold 1] Ep 4/30 | TrF1=0.6390 VaF1=0.7599


                                                                                                                                                               

[vit_base_patch16_384][Fold 1] Ep 5/30 | TrF1=0.6346 VaF1=0.8318


                                                                                                                                                               

[vit_base_patch16_384][Fold 1] Ep 6/30 | TrF1=0.6813 VaF1=0.8414


                                                                                                                                                               

[vit_base_patch16_384][Fold 1] Ep 7/30 | TrF1=0.6903 VaF1=0.8906


                                                                                                                                                               

[vit_base_patch16_384][Fold 1] Ep 8/30 | TrF1=0.6938 VaF1=0.8766


                                                                                                                                                               

[vit_base_patch16_384][Fold 1] Ep 9/30 | TrF1=0.6832 VaF1=0.8447


                                                                                                                                                               

[vit_base_patch16_384][Fold 1] Ep 10/30 | TrF1=0.7234 VaF1=0.8911


                                                                                                                                                               

[vit_base_patch16_384][Fold 1] Ep 11/30 | TrF1=0.6703 VaF1=0.8553


                                                                                                                                                               

[vit_base_patch16_384][Fold 1] Ep 12/30 | TrF1=0.6817 VaF1=0.8730


                                                                                                                                                               

[vit_base_patch16_384][Fold 1] Ep 13/30 | TrF1=0.7645 VaF1=0.8914


                                                                                                                                                               

[vit_base_patch16_384][Fold 1] Ep 14/30 | TrF1=0.8091 VaF1=0.9115


                                                                                                                                                               

[vit_base_patch16_384][Fold 1] Ep 15/30 | TrF1=0.7879 VaF1=0.8788


                                                                                                                                                               

[vit_base_patch16_384][Fold 1] Ep 16/30 | TrF1=0.7604 VaF1=0.8573


                                                                                                                                                               

[vit_base_patch16_384][Fold 1] Ep 17/30 | TrF1=0.6879 VaF1=0.9006


                                                                                                                                                               

[vit_base_patch16_384][Fold 1] Ep 18/30 | TrF1=0.7821 VaF1=0.8986


                                                                                                                                                               

[vit_base_patch16_384][Fold 1] Ep 19/30 | TrF1=0.6938 VaF1=0.8996


                                                                                                                                                               

[vit_base_patch16_384][Fold 1] Ep 20/30 | TrF1=0.8003 VaF1=0.9147


                                                                                                                                                               

[vit_base_patch16_384][Fold 1] Ep 21/30 | TrF1=0.7133 VaF1=0.9135


                                                                                                                                                               

[vit_base_patch16_384][Fold 1] Ep 22/30 | TrF1=0.8007 VaF1=0.9083


                                                                                                                                                               

[vit_base_patch16_384][Fold 1] Ep 23/30 | TrF1=0.7903 VaF1=0.9038


                                                                                                                                                               

[vit_base_patch16_384][Fold 1] Ep 24/30 | TrF1=0.7113 VaF1=0.9052


                                                                                                                                                               

[vit_base_patch16_384][Fold 1] Ep 25/30 | TrF1=0.7717 VaF1=0.9075


                                                                                                                                                               

[vit_base_patch16_384][Fold 1] Ep 26/30 | TrF1=0.7423 VaF1=0.9058


                                                                                                                                                               

[vit_base_patch16_384][Fold 1] Ep 27/30 | TrF1=0.8151 VaF1=0.9053


                                                                                                                                                               

[vit_base_patch16_384][Fold 1] Ep 28/30 | TrF1=0.7755 VaF1=0.9100


                                                                                                                                                               

[vit_base_patch16_384][Fold 1] Ep 29/30 | TrF1=0.7167 VaF1=0.9165


                                                                                                                                                               

[vit_base_patch16_384][Fold 1] Ep 30/30 | TrF1=0.7568 VaF1=0.9188

----- vit_base_patch16_384 | Fold 3/5 -----
[BS] Train=16, Valid=16


  scaler = torch.cuda.amp.GradScaler(enabled=(device.type=="cuda"))
                                                                                                                                                               

[vit_base_patch16_384][Fold 2] Ep 1/30 | TrF1=0.2049 VaF1=0.5284


                                                                                                                                                               

[vit_base_patch16_384][Fold 2] Ep 2/30 | TrF1=0.4839 VaF1=0.7158


                                                                                                                                                               

[vit_base_patch16_384][Fold 2] Ep 3/30 | TrF1=0.5404 VaF1=0.7373


                                                                                                                                                               

[vit_base_patch16_384][Fold 2] Ep 4/30 | TrF1=0.5707 VaF1=0.8383


                                                                                                                                                               

[vit_base_patch16_384][Fold 2] Ep 5/30 | TrF1=0.6608 VaF1=0.8181


                                                                                                                                                               

[vit_base_patch16_384][Fold 2] Ep 6/30 | TrF1=0.7057 VaF1=0.8545


                                                                                                                                                               

[vit_base_patch16_384][Fold 2] Ep 7/30 | TrF1=0.6818 VaF1=0.8831


                                                                                                                                                               

[vit_base_patch16_384][Fold 2] Ep 8/30 | TrF1=0.6998 VaF1=0.8665


                                                                                                                                                               

[vit_base_patch16_384][Fold 2] Ep 9/30 | TrF1=0.6787 VaF1=0.8690


                                                                                                                                                               

[vit_base_patch16_384][Fold 2] Ep 10/30 | TrF1=0.7292 VaF1=0.9079


                                                                                                                                                               

[vit_base_patch16_384][Fold 2] Ep 11/30 | TrF1=0.7858 VaF1=0.8836


                                                                                                                                                               

[vit_base_patch16_384][Fold 2] Ep 12/30 | TrF1=0.7358 VaF1=0.8813


                                                                                                                                                               

[vit_base_patch16_384][Fold 2] Ep 13/30 | TrF1=0.6586 VaF1=0.8911


                                                                                                                                                               

[vit_base_patch16_384][Fold 2] Ep 14/30 | TrF1=0.8027 VaF1=0.8912


                                                                                                                                                               

[vit_base_patch16_384][Fold 2] Ep 15/30 | TrF1=0.7874 VaF1=0.8623


                                                                                                                                                               

[vit_base_patch16_384][Fold 2] Ep 16/30 | TrF1=0.7286 VaF1=0.8845


                                                                                                                                                               

[vit_base_patch16_384][Fold 2] Ep 17/30 | TrF1=0.7252 VaF1=0.8790


                                                                                                                                                               

[vit_base_patch16_384][Fold 2] Ep 18/30 | TrF1=0.7542 VaF1=0.8957


                                                                                                                                                               

[vit_base_patch16_384][Fold 2] Ep 19/30 | TrF1=0.7495 VaF1=0.8893


                                                                                                                                                               

[vit_base_patch16_384][Fold 2] Ep 20/30 | TrF1=0.7179 VaF1=0.8972


                                                                                                                                                               

[vit_base_patch16_384][Fold 2] Ep 21/30 | TrF1=0.8360 VaF1=0.8996


                                                                                                                                                               

[vit_base_patch16_384][Fold 2] Ep 22/30 | TrF1=0.7813 VaF1=0.8934


                                                                                                                                                               

[vit_base_patch16_384][Fold 2] Ep 23/30 | TrF1=0.8244 VaF1=0.9011


                                                                                                                                                               

[vit_base_patch16_384][Fold 2] Ep 24/30 | TrF1=0.7666 VaF1=0.8931


                                                                                                                                                               

[vit_base_patch16_384][Fold 2] Ep 25/30 | TrF1=0.7357 VaF1=0.9000


                                                                                                                                                               

[vit_base_patch16_384][Fold 2] Ep 26/30 | TrF1=0.7781 VaF1=0.8957


                                                                                                                                                               

[vit_base_patch16_384][Fold 2] Ep 27/30 | TrF1=0.7179 VaF1=0.9003


                                                                                                                                                               

[vit_base_patch16_384][Fold 2] Ep 28/30 | TrF1=0.7353 VaF1=0.8977


                                                                                                                                                               

[vit_base_patch16_384][Fold 2] Ep 29/30 | TrF1=0.7855 VaF1=0.8929


                                                                                                                                                               

[vit_base_patch16_384][Fold 2] Ep 30/30 | TrF1=0.7570 VaF1=0.8957

----- vit_base_patch16_384 | Fold 4/5 -----
[BS] Train=16, Valid=16


  scaler = torch.cuda.amp.GradScaler(enabled=(device.type=="cuda"))
                                                                                                                                                               

[vit_base_patch16_384][Fold 3] Ep 1/30 | TrF1=0.1044 VaF1=0.1710


                                                                                                                                                               

[vit_base_patch16_384][Fold 3] Ep 2/30 | TrF1=0.1902 VaF1=0.3416


                                                                                                                                                               

[vit_base_patch16_384][Fold 3] Ep 3/30 | TrF1=0.4304 VaF1=0.6095


                                                                                                                                                               

[vit_base_patch16_384][Fold 3] Ep 4/30 | TrF1=0.5160 VaF1=0.7556


                                                                                                                                                               

[vit_base_patch16_384][Fold 3] Ep 5/30 | TrF1=0.5389 VaF1=0.7172


                                                                                                                                                               

[vit_base_patch16_384][Fold 3] Ep 6/30 | TrF1=0.6051 VaF1=0.7983


                                                                                                                                                               

[vit_base_patch16_384][Fold 3] Ep 7/30 | TrF1=0.7152 VaF1=0.8450


                                                                                                                                                               

[vit_base_patch16_384][Fold 3] Ep 8/30 | TrF1=0.6693 VaF1=0.8411


                                                                                                                                                               

[vit_base_patch16_384][Fold 3] Ep 9/30 | TrF1=0.7059 VaF1=0.8399


                                                                                                                                                               

[vit_base_patch16_384][Fold 3] Ep 10/30 | TrF1=0.6788 VaF1=0.8608


                                                                                                                                                               

[vit_base_patch16_384][Fold 3] Ep 11/30 | TrF1=0.7412 VaF1=0.8575


                                                                                                                                                               

[vit_base_patch16_384][Fold 3] Ep 12/30 | TrF1=0.7495 VaF1=0.8473


                                                                                                                                                               

[vit_base_patch16_384][Fold 3] Ep 13/30 | TrF1=0.7574 VaF1=0.8560


                                                                                                                                                               

[vit_base_patch16_384][Fold 3] Ep 14/30 | TrF1=0.6644 VaF1=0.8689


                                                                                                                                                               

[vit_base_patch16_384][Fold 3] Ep 15/30 | TrF1=0.6598 VaF1=0.8748


                                                                                                                                                               

[vit_base_patch16_384][Fold 3] Ep 16/30 | TrF1=0.7827 VaF1=0.8771


                                                                                                                                                               

[vit_base_patch16_384][Fold 3] Ep 17/30 | TrF1=0.7361 VaF1=0.8628


                                                                                                                                                               

[vit_base_patch16_384][Fold 3] Ep 18/30 | TrF1=0.7872 VaF1=0.8494


                                                                                                                                                               

[vit_base_patch16_384][Fold 3] Ep 19/30 | TrF1=0.7749 VaF1=0.8805


                                                                                                                                                               

[vit_base_patch16_384][Fold 3] Ep 20/30 | TrF1=0.7158 VaF1=0.8688


                                                                                                                                                               

[vit_base_patch16_384][Fold 3] Ep 21/30 | TrF1=0.7472 VaF1=0.8857


                                                                                                                                                               

[vit_base_patch16_384][Fold 3] Ep 22/30 | TrF1=0.8220 VaF1=0.8700


                                                                                                                                                               

[vit_base_patch16_384][Fold 3] Ep 23/30 | TrF1=0.6653 VaF1=0.8979


                                                                                                                                                               

[vit_base_patch16_384][Fold 3] Ep 24/30 | TrF1=0.7937 VaF1=0.8828


                                                                                                                                                               

[vit_base_patch16_384][Fold 3] Ep 25/30 | TrF1=0.7152 VaF1=0.8780


                                                                                                                                                               

[vit_base_patch16_384][Fold 3] Ep 26/30 | TrF1=0.7995 VaF1=0.8842


                                                                                                                                                               

[vit_base_patch16_384][Fold 3] Ep 27/30 | TrF1=0.7652 VaF1=0.8828


                                                                                                                                                               

[vit_base_patch16_384][Fold 3] Ep 28/30 | TrF1=0.7665 VaF1=0.8887


                                                                                                                                                               

[vit_base_patch16_384][Fold 3] Ep 29/30 | TrF1=0.7529 VaF1=0.8822


                                                                                                                                                               

[vit_base_patch16_384][Fold 3] Ep 30/30 | TrF1=0.7131 VaF1=0.8811

----- vit_base_patch16_384 | Fold 5/5 -----
[BS] Train=16, Valid=16


  scaler = torch.cuda.amp.GradScaler(enabled=(device.type=="cuda"))
                                                                                                                                                               

[vit_base_patch16_384][Fold 4] Ep 1/30 | TrF1=0.1042 VaF1=0.1192


                                                                                                                                                               

[vit_base_patch16_384][Fold 4] Ep 2/30 | TrF1=0.2852 VaF1=0.4872


                                                                                                                                                               

[vit_base_patch16_384][Fold 4] Ep 3/30 | TrF1=0.4174 VaF1=0.7356


                                                                                                                                                               

[vit_base_patch16_384][Fold 4] Ep 4/30 | TrF1=0.5618 VaF1=0.7649


                                                                                                                                                               

[vit_base_patch16_384][Fold 4] Ep 5/30 | TrF1=0.6077 VaF1=0.7688


                                                                                                                                                               

[vit_base_patch16_384][Fold 4] Ep 6/30 | TrF1=0.6179 VaF1=0.7997


                                                                                                                                                               

[vit_base_patch16_384][Fold 4] Ep 7/30 | TrF1=0.6130 VaF1=0.8268


                                                                                                                                                               

[vit_base_patch16_384][Fold 4] Ep 8/30 | TrF1=0.6940 VaF1=0.8645


                                                                                                                                                               

[vit_base_patch16_384][Fold 4] Ep 9/30 | TrF1=0.6669 VaF1=0.8623


                                                                                                                                                               

[vit_base_patch16_384][Fold 4] Ep 10/30 | TrF1=0.7255 VaF1=0.8540


                                                                                                                                                               

[vit_base_patch16_384][Fold 4] Ep 11/30 | TrF1=0.7713 VaF1=0.8693


                                                                                                                                                               

[vit_base_patch16_384][Fold 4] Ep 12/30 | TrF1=0.8088 VaF1=0.8594


                                                                                                                                                               

[vit_base_patch16_384][Fold 4] Ep 13/30 | TrF1=0.7357 VaF1=0.8634


                                                                                                                                                               

[vit_base_patch16_384][Fold 4] Ep 14/30 | TrF1=0.6780 VaF1=0.8667


                                                                                                                                                               

[vit_base_patch16_384][Fold 4] Ep 15/30 | TrF1=0.7864 VaF1=0.8522


                                                                                                                                                               

[vit_base_patch16_384][Fold 4] Ep 16/30 | TrF1=0.7083 VaF1=0.8680


                                                                                                                                                               

[vit_base_patch16_384][Fold 4] Ep 17/30 | TrF1=0.7437 VaF1=0.8679


                                                                                                                                                               

[vit_base_patch16_384][Fold 4] Ep 18/30 | TrF1=0.7077 VaF1=0.8677


                                                                                                                                                               

[vit_base_patch16_384][Fold 4] Ep 19/30 | TrF1=0.8158 VaF1=0.8641


                                                                                                                                                               

[vit_base_patch16_384][Fold 4] Ep 20/30 | TrF1=0.7651 VaF1=0.8710


                                                                                                                                                               

[vit_base_patch16_384][Fold 4] Ep 21/30 | TrF1=0.8041 VaF1=0.8813


                                                                                                                                                               

[vit_base_patch16_384][Fold 4] Ep 22/30 | TrF1=0.7536 VaF1=0.8759


                                                                                                                                                               

[vit_base_patch16_384][Fold 4] Ep 23/30 | TrF1=0.7489 VaF1=0.8723


                                                                                                                                                               

[vit_base_patch16_384][Fold 4] Ep 24/30 | TrF1=0.7052 VaF1=0.8714


                                                                                                                                                               

[vit_base_patch16_384][Fold 4] Ep 25/30 | TrF1=0.8053 VaF1=0.8709


                                                                                                                                                               

[vit_base_patch16_384][Fold 4] Ep 26/30 | TrF1=0.7789 VaF1=0.8758


                                                                                                                                                               

[vit_base_patch16_384][Fold 4] Ep 27/30 | TrF1=0.7701 VaF1=0.8666


                                                                                                                                                               

[vit_base_patch16_384][Fold 4] Ep 28/30 | TrF1=0.7995 VaF1=0.8654


                                                                                                                                                               

[vit_base_patch16_384][Fold 4] Ep 29/30 | TrF1=0.7411 VaF1=0.8735


                                                                                                                                                               

[vit_base_patch16_384][Fold 4] Ep 30/30 | TrF1=0.7181 VaF1=0.8728
>>> vit_base_patch16_384 fold F1: [0.9184342846022361, 0.9188169951047627, 0.9079333387733483, 0.8978653036463823, 0.8812525182394281] | mean=0.9049




In [8]:
# ----------------------------
# OCR OOF load (optional)
# ----------------------------
def try_load_ocr_oof_for_fold(fold, basenames):
    path = os.path.join(OCR_VALID_PROBS_DIR, f"fold{fold}_ocr_valid.csv")
    if not os.path.exists(path): return None
    df_ocr = pd.read_csv(path)
    prob_cols = [c for c in df_ocr.columns if c.startswith("prob_")]
    df_ocr = df_ocr.set_index("basename").reindex(basenames).fillna(1e-9)
    probs = df_ocr[prob_cols].values.astype(np.float64)
    probs = np.clip(probs, 1e-9, None); probs = probs / probs.sum(axis=1, keepdims=True)
    return probs

have_ocr_oof = os.path.isdir(OCR_VALID_PROBS_DIR)

# ----------------------------
# Auto-weight grid search (coarse→fine)
# ----------------------------
def eval_weights(eff_list, vit_list, ocr_list, tgt_list, we, wv, wo):
    y_pred, y_true = [], []
    for i, (eff, vit, tgt) in enumerate(zip(eff_list, vit_list, tgt_list)):
        mix = we*eff + wv*vit + (wo*(ocr_list[i]) if ocr_list is not None else 0.0)
        y_pred.append(mix.argmax(1))
        y_true.append(tgt)
    y_pred = np.concatenate(y_pred); y_true = np.concatenate(y_true)
    return f1_score(y_true, y_pred, average="macro")

def grid_search(eff_list, vit_list, ocr_list, tgt_list, step):
    best = (-1.0, 0.5, 0.5, 0.0)
    weights = np.arange(0.0, 1.0+1e-9, step)
    for we in weights:
        for wv in weights:
            rem = 1.0 - we - wv
            if ocr_list is not None:
                if rem < 0: continue
                wo_candidates = np.arange(0.0, rem+1e-9, step)
            else:
                if we + wv <= 0: continue
                wo_candidates = [0.0]
            for wo in wo_candidates:
                s = we+wv+wo; we1, wv1, wo1 = we/s, wv/s, wo/s
                score = eval_weights(eff_list, vit_list, ocr_list, tgt_list, we1, wv1, wo1)
                if score > best[0]: best = (score, we1, wv1, wo1)
    return best

eff_oof, vit_oof, tgt_oof, ocr_oof = [], [], [], [] if have_ocr_oof else None
for fold in range(NFOLDS):
    eff_oof.append(np.load(f"./oofs/tf_efficientnet_b3_ns_fold{fold}_probs.npy"))
    vit_oof.append(np.load(f"./oofs/vit_base_patch16_384_fold{fold}_probs.npy"))
    tgt_oof.append(np.load(f"./oofs/tf_efficientnet_b3_ns_fold{fold}_targets.npy"))
    basenames = np.load(f"./oofs/tf_efficientnet_b3_ns_fold{fold}_basenames.npy", allow_pickle=True).tolist()
    if have_ocr_oof and ocr_oof is not None:
        o = try_load_ocr_oof_for_fold(fold, basenames)
        if o is None: ocr_oof = None
        elif ocr_oof is not None: ocr_oof.append(o)

best = grid_search(eff_oof, vit_oof, ocr_oof, tgt_oof, step=0.1)
# fine search around best
fine = np.linspace(max(0,best[1]-0.1), min(1,best[1]+0.1), 11)
best_f = (-1.0,)*4
for we in fine:
    for wv in np.linspace(max(0,best[2]-0.1), min(1,best[2]+0.1), 11):
        rem = 1.0 - we - wv
        wo_list = [0.0] if ocr_oof is None else np.linspace(max(0,rem-0.1), max(0,rem), 6)
        for wo in wo_list:
            s = we+wv+wo; we1, wv1, wo1 = we/s, wv/s, wo/s
            sc = eval_weights(eff_oof, vit_oof, ocr_oof, tgt_oof, we1, wv1, wo1)
            if sc > best_f[0]: best_f = (sc, we1, wv1, wo1)

best_score, w_eff, w_vit, w_ocr = best_f
print(f"\n[Auto-Weight OOF] best macro F1={best_score:.4f} | w_eff={w_eff:.2f}, w_vit={w_vit:.2f}, w_ocr={(w_ocr if ocr_oof is not None else 0.0):.2f}")
with open("./oofs/best_weights.json", "w") as f:
    json.dump({"score": float(best_score), "w_eff": float(w_eff), "w_vit": float(w_vit), "w_ocr": float(w_ocr if ocr_oof is not None else 0.0)}, f, indent=2)

# ----------------------------
# Dataset-batched 8-way TTA inference (메모리 친화)
# ----------------------------
sub = pd.read_csv(SUB_CSV)
test_files = []
for name in sub["ID"]:
    if not str(name).endswith(".jpg"): name = f"{name}.jpg"
    cands = glob.glob(os.path.join(TEST_IMG_DIR, "**", name), recursive=True)
    test_files.append(cands[0] if cands else os.path.join(TEST_IMG_DIR, name))

angles = [0, 90, 180, 270]; flips = [False, True]
def rotate_np(img, angle):
    if angle==0: return img
    if angle==90: return cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
    if angle==180: return cv2.rotate(img, cv2.ROTATE_180)
    if angle==270: return cv2.rotate(img, cv2.ROTATE_90_COUNTERCLOCKWISE)

backbone_test_probs = {}

for bb in BACKBONES:
    model_name = bb["name"]; img_size = bb["img_size"]
    _, tst_tf = build_transforms(img_size)
    model = create_model(model_name, bb["drop_rate"], bb["drop_path"], bb["grad_ckpt"])
    # fold 평균 가중치
    state_paths = [f"./models/{model_name}_fold{f}.pt" for f in range(NFOLDS)]
    states = [torch.load(p, map_location="cpu") for p in state_paths]
    # 평균 가중치 로드(메모리 절감 + 속도)
    avg_state = {}
    for k in states[0].keys():
        avg_state[k] = sum(s[k] for s in states) / len(states)
    model.load_state_dict(avg_state, strict=True)
    model.eval()

    # 8 TTA 변형을 "데이터셋 전체"에 적용하여 8번 forward
    def make_loader(paths, tfm, bs):
        ds = TestDataset(paths, tfm)
        return DataLoader(ds, batch_size=bs, shuffle=False,
                          num_workers=NUM_WORKERS, pin_memory=True, persistent_workers=True)

    # 기본 변형용 텐서 생성 함수를 재사용하기 위해, PIL→np→albumentations는 그대로 두고
    # 회전/플립은 albumentations 대신 OpenCV 전처리를 별도 파일 캐시에 반영하지 않고
    # 매 변형마다 on-the-fly로 이미지 읽어서 적용하는 간단 루틴
    def transform_filelist(paths, ang, fl):
        imgs = []
        for p in paths:
            img0 = np.array(Image.open(p).convert("RGB"))
            base = rotate_np(img0, ang)
            img = cv2.flip(base, 1) if fl else base
            imgs.append(img)
        return imgs

    # 배치 크기 자동 튜닝
    bs_try = INIT_BATCH_SIZE
    test_probs_accum = None
    for ang in angles:
        for fl in flips:
            # 변형 이미지 리스트 만들기
            imgs = transform_filelist(test_files, ang, fl)
            class DummyDS(Dataset):
                def __init__(self, arr, tf): self.arr, self.tf = arr, tf
                def __len__(self): return len(self.arr)
                def __getitem__(self, i): return self.tf(image=self.arr[i])["image"]
            ds = DummyDS(imgs, tst_tf)

            def _mk(bs):
                return DataLoader(ds, batch_size=bs, shuffle=False,
                                  num_workers=NUM_WORKERS, pin_memory=True, persistent_workers=False)

            loader = None
            for scale in (1, 0.75, 0.5, 0.25):
                try:
                    bs = max(1, int(bs_try*scale))
                    loader = _mk(bs)
                    # 한 미니배치 테스트로 OOM 미리 검증
                    it = iter(loader); xb = next(it)
                    xb = xb.to(device, non_blocking=True)
                    with torch.no_grad(), torch.autocast(device_type="cuda", dtype=torch.float16, enabled=(device.type=="cuda")):
                        _ = model(xb)
                    break
                except Exception as e:
                    if "out of memory" in str(e).lower():
                        torch.cuda.empty_cache(); continue
                    else:
                        raise e
            if loader is None:
                raise RuntimeError("TTA loader OOM")

            probs_run = []
            for xb in tqdm(loader, desc=f"TTA {model_name} ang{ang} fl{fl}"):
                xb = xb.to(device, non_blocking=True)
                with torch.no_grad(), torch.autocast(device_type="cuda", dtype=torch.float16, enabled=(device.type=="cuda")):
                    logits = model(xb)
                    probs = torch.softmax(logits, dim=1).to(torch.float32).cpu().numpy()
                probs_run.append(probs)
            probs_run = np.vstack(probs_run)

            if test_probs_accum is None:
                test_probs_accum = probs_run
            else:
                test_probs_accum += probs_run
            torch.cuda.empty_cache()

    test_prob = (test_probs_accum / 8.0).astype(np.float32)
    if USE_TEMPERATURE:
        test_prob = apply_temperature_np(test_prob, T=TEMP_T)
    backbone_test_probs[model_name] = test_prob
    np.save(f"./test_probs/{model_name}_test_probs.npy", test_prob.astype(np.float16))
    del model; torch.cuda.empty_cache()

# ----------------------------
# Optional OCR test probs load
# ----------------------------
ocr_test_probs = None
if os.path.exists(OCR_TEST_PROBS_CSV):
    ocr_df = pd.read_csv(OCR_TEST_PROBS_CSV)
    prob_cols = [c for c in ocr_df.columns if c.startswith("prob_")]
    tmp = ocr_df.set_index("ID").reindex(pd.Series(sub["ID"]).astype(str)).fillna(1e-9)
    tmp = tmp[prob_cols].values
    tmp = np.clip(tmp, 1e-9, None); tmp = tmp / tmp.sum(axis=1, keepdims=True)
    ocr_test_probs = tmp.astype(np.float32)
    print("Loaded OCR test probs:", ocr_test_probs.shape)
else:
    print("No OCR test probs. Skip OCR in final ensemble.")

# ----------------------------
# Final weighted ensemble & save
# ----------------------------
eff_probs = backbone_test_probs["tf_efficientnet_b3_ns"]
vit_probs = backbone_test_probs["vit_base_patch16_384"]

w_eff, w_vit, w_ocr = float(w_eff), float(w_vit), float(w_ocr if ocr_test_probs is not None else 0.0)
ws = w_eff + w_vit + w_ocr; w_eff, w_vit, w_ocr = w_eff/ws, w_vit/ws, w_ocr/ws

final_probs = w_eff*eff_probs + w_vit*vit_probs + (w_ocr*ocr_test_probs if ocr_test_probs is not None else 0.0)
final_preds = np.argmax(final_probs, axis=1)

sub = pd.read_csv(SUB_CSV)
sub["target"] = final_preds
mean_eff = np.mean(all_backbone_fold_scores["tf_efficientnet_b3_ns"])
mean_vit = np.mean(all_backbone_fold_scores["vit_base_patch16_384"])
out_name = f"sub_v7mem_eff{mean_eff:.4f}_vit{mean_vit:.4f}_w{w_eff:.2f}-{w_vit:.2f}-{w_ocr:.2f}.csv"
sub.to_csv(out_name, index=False)
print(f"Saved submission: {out_name}")


  s = we+wv+wo; we1, wv1, wo1 = we/s, wv/s, wo/s



[Auto-Weight OOF] best macro F1=0.9776 | w_eff=0.41, w_vit=0.16, w_ocr=0.43


  A.GaussNoise(var_limit=(5, 20), p=0.3),
  model = create_fn(
  states = [torch.load(p, map_location="cpu") for p in state_paths]
TTA tf_efficientnet_b3_ns ang0 flFalse: 100%|████████████████████████████████████████████████████████████████████████████████| 197/197 [00:04<00:00, 41.09it/s]
TTA tf_efficientnet_b3_ns ang0 flTrue: 100%|█████████████████████████████████████████████████████████████████████████████████| 197/197 [00:05<00:00, 39.23it/s]
TTA tf_efficientnet_b3_ns ang90 flFalse: 100%|███████████████████████████████████████████████████████████████████████████████| 197/197 [00:05<00:00, 37.54it/s]
TTA tf_efficientnet_b3_ns ang90 flTrue: 100%|████████████████████████████████████████████████████████████████████████████████| 197/197 [00:05<00:00, 37.69it/s]
TTA tf_efficientnet_b3_ns ang180 flFalse: 100%|██████████████████████████████████████████████████████████████████████████████| 197/197 [00:05<00:00, 38.92it/s]
TTA tf_efficientnet_b3_ns ang180 flTrue: 100%|███████████████████████

Loaded OCR test probs: (3140, 17)
Saved submission: sub_v7mem_eff0.9373_vit0.9049_w0.41-0.16-0.43.csv



