In [1]:
# ============================================================
# v9_holdout_ensemble_b3_vit_convnext.py
# ------------------------------------------------------------
# ‚úÖ Train / Holdout = v9 (root-level stratified 8:2)
# ‚úÖ Models: EfficientNet-B3, ConvNeXt-Small, ViT-B16
# ‚úÖ 4-way TTA (orig, hflip, rotate ¬±7¬∞)
# ‚úÖ Auto-weight ensemble (no floor)
# ============================================================

import os, gc, math, random, warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import cv2
from tqdm import tqdm

import torch
import torch.nn as nn
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader

import albumentations as A
from albumentations.pytorch import ToTensorV2

import timm
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score


# ============================================================
# Config
# ============================================================
BASE = "/data/ephemeral/home/data"

META_V9 = f"{BASE}/meta_stage0_9_train_v9.csv"
TRAIN_CSV = f"{BASE}/raw/train.csv"
SUB_CSV = f"{BASE}/raw/sample_submission.csv"
TRAIN_IMG_V9 = f"{BASE}/processed/stage0_9_train_v9"
TEST_IMG_V8 = f"{BASE}/processed/stage0_8_test_v8"

OUT_DIR = "./runs_v9_holdout"
os.makedirs(OUT_DIR, exist_ok=True)
os.makedirs(f"{OUT_DIR}/models", exist_ok=True)
os.makedirs(f"{OUT_DIR}/test_probs", exist_ok=True)
os.makedirs(f"{OUT_DIR}/debug", exist_ok=True)

CFG = dict(
    seed=42, num_classes=17,
    img_size_b3=380, img_size_conv=384, img_size_vit=384,
    batch_size=16, epochs=50, early_stop=8,
    lr=1e-4, wd=1e-4, label_smooth=0.05,
    device="cuda" if torch.cuda.is_available() else "cpu",
)


# ============================================================
# Utils
# ============================================================
def set_seed(seed):
    random.seed(seed); np.random.seed(seed)
    torch.manual_seed(seed); torch.cuda.manual_seed_all(seed)

def row_normalize(x):
    x = np.clip(np.asarray(x, dtype=np.float64), 1e-12, None)
    return (x / x.sum(1, keepdims=True)).astype(np.float32)

def corrcoef(a, b):
    a = a - a.mean(axis=1, keepdims=True)
    b = b - b.mean(axis=1, keepdims=True)
    return float(np.mean(((a / np.linalg.norm(a, axis=1, keepdims=True)) *
                          (b / np.linalg.norm(b, axis=1, keepdims=True))).sum(1)))

def temp_scale(probs, T):
    logits = np.log(np.clip(probs, 1e-12, 1.0)) / T
    e = np.exp(logits - logits.max(1, keepdims=True))
    return row_normalize(e)

def fit_temperature(probs, targets, t_min=0.8, t_max=2.0, steps=25):
    ys = np.eye(CFG["num_classes"])[targets]
    best_T, best_nll = 1.0, 1e9
    for T in np.linspace(t_min, t_max, steps):
        p = temp_scale(probs, T)
        nll = -np.sum(ys * np.log(np.clip(p, 1e-12, 1.0))) / len(targets)
        if nll < best_nll:
            best_nll, best_T = nll, T
    return best_T

def save_npz(path, arr):
    np.savez_compressed(path, np.asarray(arr, dtype=np.float32))


# ============================================================
# Data prep (root-level split)
# ============================================================
set_seed(CFG["seed"])

meta = pd.read_csv(META_V9)
train_raw = pd.read_csv(TRAIN_CSV)

train_raw["ID_norm"] = train_raw["ID"].apply(lambda x: x if x.endswith(".jpg") else f"{x}.jpg")
id2tgt = dict(zip(train_raw["ID_norm"], train_raw["target"]))

meta["root_id"] = meta["basename"].apply(lambda x: os.path.splitext(x)[0].split("_aug")[0] + ".jpg")
meta["target"] = meta["root_id"].map(id2tgt)
meta = meta.dropna(subset=["target"]).reset_index(drop=True)
meta["target"] = meta["target"].astype(int)

meta["filepath"] = meta.apply(
    lambda r: os.path.join(TRAIN_IMG_V9, str(r["group"]), str(r["basename"])), axis=1
)

orig = meta[~meta["basename"].str.contains("_aug")][["root_id", "target"]].drop_duplicates()
train_roots, hold_roots = train_test_split(
    orig["root_id"], test_size=0.2, random_state=CFG["seed"], stratify=orig["target"]
)

train_df = meta[meta["root_id"].isin(train_roots)].reset_index(drop=True)
hold_df = meta[meta["root_id"].isin(hold_roots)].reset_index(drop=True)

print(f"‚úÖ Total: {len(meta)} | Train: {len(train_df)} | Holdout: {len(hold_df)}")




‚úÖ Total: 14130 | Train: 11304 | Holdout: 2826


In [2]:
# ============================================================
# Transforms
# ============================================================
def build_transforms(img_size, is_train=True):
    if is_train:
        return A.Compose([
            A.Resize(img_size, img_size),
            A.ShiftScaleRotate(shift_limit=0.03, scale_limit=0.05,
                               rotate_limit=7, border_mode=cv2.BORDER_REFLECT_101, p=0.5),
            A.HorizontalFlip(p=0.5),
            A.RandomBrightnessContrast(0.1, 0.1, p=0.3),
            A.GaussianBlur(blur_limit=(3, 5), p=0.2),
            A.ImageCompression(quality_lower=90, quality_upper=100, p=0.2),
            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
            ToTensorV2(),
        ])
    else:
        return A.Compose([
            A.Resize(img_size, img_size),
            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
            ToTensorV2(),
        ])


# ============================================================
# Dataset
# ============================================================
class ImgDataset(Dataset):
    def __init__(self, df, img_size, is_train):
        self.df = df.reset_index(drop=True)
        self.tf = build_transforms(img_size, is_train)
    def __len__(self): return len(self.df)
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = cv2.imread(row["filepath"])
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        aug = self.tf(image=img)["image"]
        return aug, int(row["target"])


# ============================================================
# Model Factory
# ============================================================
def create_model(name, num_classes, drop_rate=0.0, drop_path=0.0):
    model = timm.create_model(name, pretrained=True,
                              num_classes=num_classes,
                              drop_rate=drop_rate, drop_path_rate=drop_path)
    model.to(CFG["device"])
    model.to(memory_format=torch.channels_last)
    if hasattr(model, "set_grad_checkpointing"):
        model.set_grad_checkpointing(True)
    return model


# ============================================================
# Training Loop
# ============================================================
def train_one_epoch(model, loader, optimizer, loss_fn, scaler):
    model.train(); total = 0
    for xb, yb in loader:
        xb, yb = xb.to(CFG["device"]), yb.to(CFG["device"])
        optimizer.zero_grad(set_to_none=True)
        with torch.autocast("cuda", torch.float16):
            out = model(xb); loss = loss_fn(out, yb)
        scaler.scale(loss).backward(); scaler.step(optimizer); scaler.update()
        total += loss.item()
    return total / len(loader)


@torch.no_grad()
def validate(model, loader, loss_fn):
    model.eval(); total, probs, tgts = 0, [], []
    for xb, yb in loader:
        xb, yb = xb.to(CFG["device"]), yb.to(CFG["device"])
        with torch.autocast("cuda", torch.float16):
            out = model(xb); loss = loss_fn(out, yb)
            p = torch.softmax(out, dim=1).cpu().numpy()
        total += loss.item(); probs.append(p); tgts.append(yb.cpu().numpy())
    probs = row_normalize(np.vstack(probs)); tgts = np.concatenate(tgts)
    f1 = f1_score(tgts, probs.argmax(1), average="macro")
    return total / len(loader), f1, probs, tgts


# ============================================================
# Backbone trainer
# ============================================================
def train_backbone(key, train_df, hold_df):
    cfg = {
        "b3":  ("tf_efficientnet_b3_ns", CFG["img_size_b3"], 0.4, 0.2),
        "conv":("convnext_small", CFG["img_size_conv"], 0.0, 0.2),
        "vit": ("vit_base_patch16_384", CFG["img_size_vit"], 0.2, 0.1)
    }[key]

    name, img_size, dr, dp = cfg
    model = create_model(name, CFG["num_classes"], dr, dp)
    tr_ld = DataLoader(ImgDataset(train_df, img_size, True), batch_size=CFG["batch_size"], shuffle=True, num_workers=4)
    ho_ld = DataLoader(ImgDataset(hold_df, img_size, False), batch_size=CFG["batch_size"], shuffle=False, num_workers=4)
    opt = AdamW(model.parameters(), lr=CFG["lr"], weight_decay=CFG["wd"])
    loss_fn = nn.CrossEntropyLoss(label_smoothing=CFG["label_smooth"])
    scaler = torch.cuda.amp.GradScaler()

    best, es = -1, 0
    for ep in range(1, CFG["epochs"] + 1):
        tr_loss = train_one_epoch(model, tr_ld, opt, loss_fn, scaler)
        va_loss, va_f1, probs, tgts = validate(model, ho_ld, loss_fn)
        print(f"[{key}] Ep{ep:02d}/{CFG['epochs']} | Tr={tr_loss:.4f} Va={va_loss:.4f} F1={va_f1:.4f}")
        if va_f1 > best:
            best, es = va_f1, 0
            torch.save(model.state_dict(), f"{OUT_DIR}/models/{key}_best.pt")
            best_probs, best_tgts = probs, tgts
        else: es += 1
        if es >= CFG["early_stop"]: break

    print(f"‚úÖ {key} best F1={best:.4f}")
    del model; torch.cuda.empty_cache(); gc.collect()
    return best_probs, best_tgts




In [3]:
# ============================================================
# TTA inference (4-way)
# ============================================================
def rotate(img, angle):
    if angle == 0: return img
    M = cv2.getRotationMatrix2D((img.shape[1]/2, img.shape[0]/2), angle, 1)
    return cv2.warpAffine(img, M, (img.shape[1], img.shape[0]), borderMode=cv2.BORDER_REFLECT_101)

@torch.no_grad()
def infer_tta(key, model_path, paths):
    name, img_size = {
        "b3":("tf_efficientnet_b3_ns", CFG["img_size_b3"]),
        "conv":("convnext_small", CFG["img_size_conv"]),
        "vit":("vit_base_patch16_384", CFG["img_size_vit"])
    }[key]
    model = create_model(name, CFG["num_classes"])
    model.load_state_dict(torch.load(model_path, map_location="cpu"), strict=True)
    model.eval()

    tf = build_transforms(img_size, False)
    ttas = [(0, False), (0, True), (7, False), (-7, False)]
    preds = None

    for ang, flip in ttas:
        batch, out = [], []
        for p in paths:
            img = cv2.cvtColor(cv2.imread(p), cv2.COLOR_BGR2RGB)
            img = rotate(img, ang)
            if flip: img = cv2.flip(img, 1)
            batch.append(tf(image=img)["image"])
        ds = torch.stack(batch)
        dl = DataLoader(ds, batch_size=CFG["batch_size"], shuffle=False, num_workers=2)
        for xb in dl:
            xb = xb.to(CFG["device"])
            with torch.autocast("cuda", torch.float16):
                pr = torch.softmax(model(xb), dim=1).cpu().numpy()
            out.append(pr)
        out = row_normalize(np.vstack(out))
        preds = out if preds is None else preds + out

    preds /= len(ttas)
    del model; torch.cuda.empty_cache(); gc.collect()
    return row_normalize(preds)



In [4]:

# ============================================================
# Ensemble Weight Search
# ============================================================
def search_weights(p1, p2, p3, y, step=0.05):
    best_f1, best_w = -1, (0.33, 0.33, 0.34)
    for w1 in np.arange(0, 1.01, step):
        for w2 in np.arange(0, 1.01 - w1, step):
            w3 = 1 - w1 - w2
            mix = row_normalize(w1*p1 + w2*p2 + w3*p3)
            f1 = f1_score(y, mix.argmax(1), average="macro")
            if f1 > best_f1:
                best_f1, best_w = f1, (w1, w2, w3)
    return best_w, best_f1



In [5]:

# ============================================================
# Main
# ============================================================
if __name__ == "__main__":
    print("üöÄ Training 3 backbones with root-level v9 split")
    b3_p, ho_y = train_backbone("b3", train_df, hold_df)
    conv_p, _ = train_backbone("conv", train_df, hold_df)
    vit_p, _ = train_backbone("vit", train_df, hold_df)

    print("\nüìè Temperature scaling")
    T_b3, T_conv, T_vit = fit_temperature(b3_p, ho_y), fit_temperature(conv_p, ho_y), fit_temperature(vit_p, ho_y)
    b3_p, conv_p, vit_p = temp_scale(b3_p, T_b3), temp_scale(conv_p, T_conv), temp_scale(vit_p, T_vit)

    print("\n‚öñÔ∏è Searching best ensemble weights...")
    (w_b3, w_conv, w_vit), best_f1 = search_weights(b3_p, conv_p, vit_p, ho_y)
    print(f"Best holdout F1={best_f1:.4f} | weights: b3={w_b3:.2f}, conv={w_conv:.2f}, vit={w_vit:.2f}")

    sub = pd.read_csv(SUB_CSV)
    sub["ID"] = sub["ID"].apply(lambda x: x if x.endswith(".jpg") else f"{x}.jpg")
    test_paths = [os.path.join(TEST_IMG_V8, f) for f in sub["ID"]]

    print("\nüîÆ Inference with 4-way TTA")
    b3_test = infer_tta("b3", f"{OUT_DIR}/models/b3_best.pt", test_paths)
    conv_test = infer_tta("conv", f"{OUT_DIR}/models/conv_best.pt", test_paths)
    vit_test = infer_tta("vit", f"{OUT_DIR}/models/vit_best.pt", test_paths)

    save_npz(f"{OUT_DIR}/test_probs/b3_test_probs.npz", b3_test)
    save_npz(f"{OUT_DIR}/test_probs/conv_test_probs.npz", conv_test)
    save_npz(f"{OUT_DIR}/test_probs/vit_test_probs.npz", vit_test)

    mix = row_normalize(w_b3*b3_test + w_conv*conv_test + w_vit*vit_test)
    preds = mix.argmax(1)
    out = pd.DataFrame({"ID": sub["ID"], "target": preds})
    out_name = f"{OUT_DIR}/submission_v9_F{best_f1:.4f}.csv"
    out.to_csv(out_name, index=False)
    print(f"\n‚úÖ Saved submission ‚Üí {out_name}")


üöÄ Training 3 backbones with root-level v9 split
[b3] Ep01/50 | Tr=0.9795 Va=0.5965 F1=0.9000
[b3] Ep02/50 | Tr=0.4846 Va=0.5142 F1=0.9341
[b3] Ep03/50 | Tr=0.4035 Va=0.5139 F1=0.9406
[b3] Ep04/50 | Tr=0.3805 Va=0.5250 F1=0.9394
[b3] Ep05/50 | Tr=0.3695 Va=0.5414 F1=0.9368
[b3] Ep06/50 | Tr=0.3583 Va=0.5085 F1=0.9424
[b3] Ep07/50 | Tr=0.3508 Va=0.5195 F1=0.9400
[b3] Ep08/50 | Tr=0.3500 Va=0.5310 F1=0.9409
[b3] Ep09/50 | Tr=0.3445 Va=0.5206 F1=0.9455
[b3] Ep10/50 | Tr=0.3440 Va=0.5118 F1=0.9406
[b3] Ep11/50 | Tr=0.3405 Va=0.5303 F1=0.9375
[b3] Ep12/50 | Tr=0.3393 Va=0.5131 F1=0.9423
[b3] Ep13/50 | Tr=0.3411 Va=0.5165 F1=0.9420
[b3] Ep14/50 | Tr=0.3353 Va=0.5188 F1=0.9439
[b3] Ep15/50 | Tr=0.3365 Va=0.5124 F1=0.9499
[b3] Ep16/50 | Tr=0.3354 Va=0.5024 F1=0.9476
[b3] Ep17/50 | Tr=0.3313 Va=0.4907 F1=0.9549
[b3] Ep18/50 | Tr=0.3290 Va=0.4905 F1=0.9524
[b3] Ep19/50 | Tr=0.3335 Va=0.5471 F1=0.9375
[b3] Ep20/50 | Tr=0.3335 Va=0.4964 F1=0.9463
[b3] Ep21/50 | Tr=0.3289 Va=0.5213 F1=0.9437
[b3]