# Data Understanding & Preparation

In [1]:
# ============================================================
# STAGE 1 — Data Understanding & Preparation (REVISION FULL)
# OBJECT-AWARE · DICE-REALISTIC · TRAINING-DRIVEN
#
# OUTPUT:
# - stage1_profile.parquet
# - stage1_priors.json
#
# FEEDS:
# - Stage 3 sampling
# - loss weighting
# - threshold & postprocess policy
# ============================================================

from pathlib import Path
import numpy as np
import cv2
import pandas as pd
from tqdm import tqdm
import re, json
from scipy.spatial.distance import cdist

# -----------------------------
# CONFIG
# -----------------------------
DATA_ROOT = Path("/kaggle/input/data-science-ara-7-0/dataset/dataset")
TRAIN_IMG_DIR = DATA_ROOT / "train/images"
TRAIN_MASK_DIR = DATA_ROOT / "train/mask"

ART_DIR = Path("/kaggle/working/artifacts")
ART_DIR.mkdir(exist_ok=True)

# -----------------------------
# UTIL
# -----------------------------
def extract_index(name: str):
    m = re.search(r"(\d+)", name)
    return m.group(1) if m else None

# -----------------------------
# PAIR IMAGE–MASK
# -----------------------------
pairs = []
mask_index = {extract_index(p.stem): p for p in TRAIN_MASK_DIR.iterdir()}

for img in TRAIN_IMG_DIR.iterdir():
    idx = extract_index(img.stem)
    if idx in mask_index:
        pairs.append((img, mask_index[idx], idx))

print(f"[INFO] Paired samples: {len(pairs)}")

# -----------------------------
# ANALYSIS
# -----------------------------
records = []
all_component_areas = []
all_component_counts = []

for img_p, mask_p, idx in tqdm(pairs, desc="STAGE 1 Analysis"):
    mask = cv2.imread(str(mask_p), cv2.IMREAD_GRAYSCALE)
    h, w = mask.shape
    total_px = h * w

    bin_mask = (mask == 255).astype(np.uint8)
    fg_px = bin_mask.sum()
    area_ratio = fg_px / total_px

    num, labels, stats, centroids = cv2.connectedComponentsWithStats(bin_mask, 8)
    areas = stats[1:, cv2.CC_STAT_AREA] if num > 1 else np.array([])
    centers = centroids[1:] if num > 1 else np.empty((0,2))

    # ---- object statistics ----
    n_comp = len(areas)
    min_area = int(areas.min()) if n_comp else 0
    mean_area = float(areas.mean()) if n_comp else 0.0

    # ---- boundary complexity ----
    edges = cv2.Canny(bin_mask * 255, 50, 150)
    boundary_ratio = edges.sum() / max(fg_px, 1)

    # ---- fragmentation ----
    mean_dist = 0.0
    if len(centers) > 1:
        d = cdist(centers, centers)
        mean_dist = float(d[d > 0].mean())

    # ---- OBJECT-AWARE difficulty (0–1) ----
    difficulty = (
        0.40 * (area_ratio < 0.008) +
        0.30 * (n_comp >= 3) +
        0.20 * (boundary_ratio > 0.10) +
        0.10 * (mean_dist > 120)
    )
    difficulty = float(np.clip(difficulty, 0, 1))

    all_component_areas.extend(areas.tolist())
    all_component_counts.append(n_comp)

    records.append({
        "image_id": idx,
        "image": img_p.name,
        "has_pothole": int(fg_px > 0),
        "area_ratio": area_ratio,
        "num_components": n_comp,
        "min_component_px": min_area,
        "mean_component_px": mean_area,
        "boundary_ratio": boundary_ratio,
        "mean_component_dist": mean_dist,
        "difficulty_score": difficulty
    })

df = pd.DataFrame(records)

# ============================================================
# OBJECT-AWARE DICE CEILING (REALISTIC)
# ============================================================
comp = pd.Series(all_component_areas)

q = comp.quantile([0.05, 0.10, 0.25]).astype(int)

# assume model misses smallest K% objects entirely
miss_ratio = {
    "5%": comp[comp < q.loc[0.05]].count() / len(comp),
    "10%": comp[comp < q.loc[0.10]].count() / len(comp),
    "25%": comp[comp < q.loc[0.25]].count() / len(comp),
}

# empirical ceiling assumption
dice_ceiling = {
    "optimistic": 1.0 - miss_ratio["5%"] * 0.5,
    "realistic": 1.0 - miss_ratio["10%"],
    "pessimistic": 1.0 - miss_ratio["25%"],
}

print("\n[DICE CEILING — OBJECT-AWARE]")
for k, v in dice_ceiling.items():
    print(f"{k:<12}: Dice ≤ {v:.3f}")

# ============================================================
# TRAINING PRIORS (SAFE & EFFECTIVE)
# ============================================================
df["sampling_weight"] = (
    1.0 +
    1.2 * df["difficulty_score"] +
    0.3 * (df["has_pothole"] == 0)
).clip(0.7, 2.2)

priors = {
    "dice_ceiling": dice_ceiling,
    "min_area_px": {
        "aggressive": int(q.loc[0.05]),
        "balanced": int(q.loc[0.10]),
        "safe": int(q.loc[0.25]),
    },
    "sampling": {
        "difficulty_boost": 1.2,
        "empty_boost": 0.3,
        "max_weight": 2.2
    },
    "object_ratio_target": [0.65, 0.80],
    "threshold": {
        "search_range": [0.30, 0.45],
        "default_start": 0.35
    },
    "loss_policy": {
        "phase1": "dice + focal(alpha=0.7)",
        "phase2": "dice + focal(0.3) + boundary(0.05)"
    }
}

# ============================================================
# EXPORT
# ============================================================
df.to_parquet(ART_DIR / "stage1_profile.parquet", index=False)

with open(ART_DIR / "stage1_priors.json", "w") as f:
    json.dump(priors, f, indent=2)

print("\n[STAGE 1 COMPLETE — 0.80-READY]")
print("✓ Object-aware Dice ceiling (REALISTIC)")
print("✓ Difficulty score stabilized")
print("✓ Safe sampling priors (no over-forcing)")
print("✓ Directly feeds Stage 3 curriculum")

[INFO] Paired samples: 498


STAGE 1 Analysis: 100%|██████████| 498/498 [00:18<00:00, 26.44it/s]


[DICE CEILING — OBJECT-AWARE]
optimistic  : Dice ≤ 0.975
realistic   : Dice ≤ 0.901
pessimistic : Dice ≤ 0.750

[STAGE 1 COMPLETE — 0.80-READY]
✓ Object-aware Dice ceiling (REALISTIC)
✓ Difficulty score stabilized
✓ Safe sampling priors (no over-forcing)
✓ Directly feeds Stage 3 curriculum





# Preprocessing & Data Augmentation

In [2]:
# ============================================================
# STAGE 2 — Preprocessing & Data Augmentation (FINAL · CONVNEXT)
# ============================================================

import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2

IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD  = (0.229, 0.224, 0.225)

# ============================================================
# TRAIN TRANSFORM — 512x512 (CONVNEXT-SAFE)
# ============================================================
train_transform_512 = A.Compose(
    [
        # -------------------------
        # FIXED RESOLUTION
        # -------------------------
        A.Resize(512, 512, interpolation=cv2.INTER_LINEAR),

        # -------------------------
        # GEOMETRY — ULTRA STABLE
        # -------------------------
        A.HorizontalFlip(p=0.5),

        A.Affine(
            scale=(0.995, 1.025),          # ⬅️ lebih sempit
            translate_percent=(0.0, 0.015),
            rotate=(-1.0, 1.0),
            shear=(-0.8, 0.8),
            interpolation=cv2.INTER_LINEAR,
            p=0.25,
        ),

        # -------------------------
        # SHAPE CONTINUITY (SINGLE ONLY)
        # -------------------------
        A.GridDistortion(
            num_steps=5,
            distort_limit=0.010,
            border_mode=cv2.BORDER_REFLECT_101,
            p=0.06,
        ),

        # -------------------------
        # CONFUSER-AWARE PHOTOMETRIC
        # -------------------------
        A.OneOf(
            [
                A.RandomBrightnessContrast(
                    brightness_limit=0.14,
                    contrast_limit=0.16,
                ),
                A.RandomGamma(gamma_limit=(92, 110)),
            ],
            p=0.50,
        ),

        A.HueSaturationValue(
            hue_shift_limit=3,
            sat_shift_limit=6,
            val_shift_limit=3,
            p=0.20,
        ),

        # -------------------------
        # SHADOW — REDUCED (IMPORTANT)
        # -------------------------
        A.RandomShadow(
            shadow_roi=(0, 0.40, 1, 1),
            shadow_dimension=4,
            p=0.10,   # ⬅️ DITURUNKAN
        ),

        # -------------------------
        # TEXTURE NOISE — VERY MILD
        # -------------------------
        A.GaussNoise(std_range=(0.015, 0.045), p=0.08),

        # -------------------------
        # NORMALIZE
        # -------------------------
        A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
        ToTensorV2(),
    ],
    additional_targets={"mask": "mask"},
)

# ============================================================
# VALID / TEST (UNCHANGED)
# ============================================================
valid_transform = A.Compose(
    [
        A.Resize(512, 512),
        A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
        ToTensorV2(),
    ],
    additional_targets={"mask": "mask"},
)

test_transform = A.Compose(
    [
        A.Resize(512, 512),
        A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
        ToTensorV2(),
    ]
)

print("[STAGE 2 FINAL — CONVNEXT READY]")


[STAGE 2 FINAL — CONVNEXT READY]


# Model Construction & Training

In [3]:
!pip install -q segmentation-models-pytorch==0.3.3 timm

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.8/58.8 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m68.5/68.5 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m106.7/106.7 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m44.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for efficientnet-pytorch (setup.py) ... [?25l[?25hdone
  Building wheel for pretrainedmodels (setup.py) ... [?25l[?25hdone


In [4]:
# ============================================================
# STAGE 3 — UNET++ + TIMM-EFFICIENTNET-B4
# STABLE · NO COLLAPSE · BUG-FREE
# ============================================================

import os, re, random, cv2
import numpy as np
import pandas as pd
from pathlib import Path
from tqdm import tqdm

import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.cuda.amp import autocast, GradScaler

import segmentation_models_pytorch as smp
from sklearn.model_selection import train_test_split

# -----------------------------
# CONFIG
# -----------------------------
SEED = 42
IMG_SIZE = 512
BATCH = 4
ACCUM = 3                 # effective batch = 12
EPOCHS = 30
FREEZE_EPOCHS = 12
LR = 5e-5                 # STABLE LR
WD = 1e-4
VAL_RATIO = 0.15
THR_RANGE = np.linspace(0.25, 0.50, 11)

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", DEVICE)

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

# -----------------------------
# DATA
# -----------------------------
DATA_ROOT = Path("/kaggle/input/data-science-ara-7-0/dataset/dataset")
IMG_DIR = DATA_ROOT / "train/images"
MSK_DIR = DATA_ROOT / "train/mask"

pairs = []
for img in IMG_DIR.iterdir():
    idx = re.search(r"(\d+)", img.name).group(1)
    m = MSK_DIR / f"mask_{idx}.png"
    if m.exists():
        pairs.append((str(img), str(m)))

df = pd.DataFrame(pairs, columns=["img", "mask"])
df_tr, df_va = train_test_split(df, test_size=VAL_RATIO, random_state=SEED)

print("[INFO] train:", len(df_tr), "val:", len(df_va))

# -----------------------------
# DATASET (FIXED)
# -----------------------------
class DS(Dataset):
    def __init__(self, df, tfm):
        self.df = df.reset_index(drop=True)
        self.tfm = tfm

    def __len__(self):
        return len(self.df)

    def __getitem__(self, i):
        img_path = self.df.loc[i, "img"]
        msk_path = self.df.loc[i, "mask"]

        img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB)
        msk = (cv2.imread(msk_path, 0) == 255).astype("float32")

        img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
        msk = cv2.resize(msk, (IMG_SIZE, IMG_SIZE), cv2.INTER_NEAREST)

        out = self.tfm(image=img, mask=msk)
        return out["image"], out["mask"].unsqueeze(0)

# -----------------------------
# LOSSES
# -----------------------------
def dice_loss(l, t):
    p = torch.sigmoid(l)
    inter = (p * t).sum()
    return 1 - (2*inter + 1e-7)/(p.sum() + t.sum() + 1e-7)

def focal_loss(l, t):
    return F.binary_cross_entropy_with_logits(l, t)

def boundary_loss(l, t):
    p = torch.sigmoid(l)
    k = torch.tensor([[1,0,-1],[2,0,-2],[1,0,-1]],
                     device=l.device).float().view(1,1,3,3)
    ep = F.conv2d(p, k, padding=1).abs()
    et = F.conv2d(t, k, padding=1).abs()
    return F.l1_loss(ep, et)

def criterion(l, t, epoch):
    base = dice_loss(l,t) + 0.5*focal_loss(l,t)
    if epoch >= 10:
        return base + 0.1*boundary_loss(l,t)
    return base

# -----------------------------
# METRIC
# -----------------------------
@torch.no_grad()
def dice_eval(p, g, thr):
    if g.sum()==0:
        return 1.0 if p.max()<thr else 0.0
    b = (p > thr).float()
    inter = (b*g).sum()
    return float((2*inter + 1e-7)/(b.sum()+g.sum()+1e-7))

# -----------------------------
# MODEL
# -----------------------------
model = smp.UnetPlusPlus(
    encoder_name="timm-efficientnet-b4",
    encoder_weights="imagenet",
    in_channels=3,
    classes=1,
).to(DEVICE)

for p in model.encoder.parameters():
    p.requires_grad = False

opt = AdamW(model.parameters(), lr=LR, weight_decay=WD)
sch = CosineAnnealingLR(opt, T_max=EPOCHS)
scaler = GradScaler()

tr_ds = DS(df_tr, train_transform_512)
va_ds = DS(df_va, valid_transform)

tr_ld = DataLoader(tr_ds, BATCH, shuffle=True, num_workers=2, pin_memory=True)
va_ld = DataLoader(va_ds, 1, shuffle=False)

best = 0.0
best_thr = 0.4

print("\n===== TRAINING STABLE =====")

for ep in range(1, EPOCHS+1):

    if ep == FREEZE_EPOCHS+1:
        print(">> Unfreeze encoder")
        for p in model.encoder.parameters():
            p.requires_grad = True

    model.train()
    opt.zero_grad()

    for i,(x,y) in enumerate(tqdm(tr_ld, leave=False)):
        x,y = x.to(DEVICE), y.to(DEVICE)
        with autocast():
            l = model(x)
            loss = criterion(l,y,ep)/ACCUM
        scaler.scale(loss).backward()
        if (i+1)%ACCUM==0:
            scaler.step(opt)
            scaler.update()
            opt.zero_grad()

    sch.step()

    model.eval()
    scores = {t:[] for t in THR_RANGE}
    with torch.no_grad():
        for x,y in va_ld:
            x,y = x.to(DEVICE), y.to(DEVICE)
            p = torch.sigmoid(model(x))
            for t in THR_RANGE:
                scores[t].append(dice_eval(p,y,t))

    cur_thr, cur_val = max(
        ((t,np.mean(v)) for t,v in scores.items()),
        key=lambda x:x[1]
    )

    if cur_val > best:
        best = cur_val
        best_thr = cur_thr
        torch.save(model.state_dict(), "/kaggle/working/best_unetpp.pt")
        print(f">> BEST | Dice {best:.4f} @ thr={best_thr:.2f}")

    print(f"Epoch {ep:02d} | ValDice {cur_val:.4f}")

print(f"\n[DONE] BEST DICE {best:.4f}")




Device: cuda
[INFO] train: 423 val: 75
Downloading: "https://github.com/huggingface/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b4-74ee3bed.pth" to /root/.cache/torch/hub/checkpoints/tf_efficientnet_b4-74ee3bed.pth


100%|██████████| 74.4M/74.4M [00:00<00:00, 114MB/s]
  scaler = GradScaler()



===== TRAINING STABLE =====


  with autocast():
                                                 

>> BEST | Dice 0.2313 @ thr=0.50
Epoch 01 | ValDice 0.2313


                                                 

>> BEST | Dice 0.3294 @ thr=0.50
Epoch 02 | ValDice 0.3294


                                                 

>> BEST | Dice 0.4342 @ thr=0.50
Epoch 03 | ValDice 0.4342


                                                 

>> BEST | Dice 0.4663 @ thr=0.50
Epoch 04 | ValDice 0.4663


                                                 

>> BEST | Dice 0.5017 @ thr=0.50
Epoch 05 | ValDice 0.5017


                                                 

>> BEST | Dice 0.5333 @ thr=0.50
Epoch 06 | ValDice 0.5333


                                                 

>> BEST | Dice 0.5525 @ thr=0.50
Epoch 07 | ValDice 0.5525


                                                 

Epoch 08 | ValDice 0.5441


                                                 

>> BEST | Dice 0.5781 @ thr=0.50
Epoch 09 | ValDice 0.5781


                                                 

>> BEST | Dice 0.5800 @ thr=0.50
Epoch 10 | ValDice 0.5800


                                                 

Epoch 11 | ValDice 0.5727


                                                 

>> BEST | Dice 0.5804 @ thr=0.50
Epoch 12 | ValDice 0.5804
>> Unfreeze encoder


                                                 

>> BEST | Dice 0.5898 @ thr=0.50
Epoch 13 | ValDice 0.5898


                                                 

>> BEST | Dice 0.5949 @ thr=0.50
Epoch 14 | ValDice 0.5949


                                                 

>> BEST | Dice 0.6223 @ thr=0.50
Epoch 15 | ValDice 0.6223


                                                 

Epoch 16 | ValDice 0.6182


                                                 

>> BEST | Dice 0.6276 @ thr=0.50
Epoch 17 | ValDice 0.6276


                                                 

Epoch 18 | ValDice 0.6147


                                                 

Epoch 19 | ValDice 0.6233


                                                 

Epoch 20 | ValDice 0.6205


                                                 

Epoch 21 | ValDice 0.6235


                                                 

Epoch 22 | ValDice 0.6258


                                                 

>> BEST | Dice 0.6358 @ thr=0.50
Epoch 23 | ValDice 0.6358


                                                 

Epoch 24 | ValDice 0.6309


                                                 

Epoch 25 | ValDice 0.6326


                                                 

Epoch 26 | ValDice 0.6261


                                                 

>> BEST | Dice 0.6371 @ thr=0.50
Epoch 27 | ValDice 0.6371


                                                 

Epoch 28 | ValDice 0.6370


                                                 

Epoch 29 | ValDice 0.6330


                                                 

Epoch 30 | ValDice 0.6264

[DONE] BEST DICE 0.6371


# Optimization, Validation & Refinement

In [5]:
# ============================================================
# STAGE 4 — THRESHOLD & POSTPROCESS OPTIMIZATION (FINAL)
# MATCH STAGE 3 · SAM-FRIENDLY · 80+ READY
# ============================================================

!pip install -q optuna segmentation-models-pytorch

import optuna
import numpy as np
import torch
import cv2
from pathlib import Path
from tqdm import tqdm
import torch.nn.functional as F

import segmentation_models_pytorch as smp
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

# -----------------------------
# CONFIG
# -----------------------------
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
IMG_SIZE = 512
VAL_RATIO = 0.15

print("Device:", DEVICE)

# -----------------------------
# DATA
# -----------------------------
DATA_ROOT = Path("/kaggle/input/data-science-ara-7-0/dataset/dataset")
IMG_DIR = DATA_ROOT / "train/images"
MSK_DIR = DATA_ROOT / "train/mask"

pairs = []
for img in IMG_DIR.iterdir():
    idx = img.stem.split("_")[-1]
    m = MSK_DIR / f"mask_{idx}.png"
    if m.exists():
        pairs.append((str(img), str(m)))

pairs = np.array(pairs, dtype=object)
_, val_pairs = train_test_split(pairs, test_size=VAL_RATIO, random_state=42)

# -----------------------------
# DATASET (MATCH STAGE 3)
# -----------------------------
class ValDataset(Dataset):
    def __init__(self, pairs):
        self.pairs = pairs

    def __len__(self): return len(self.pairs)

    def __getitem__(self, i):
        img = cv2.cvtColor(cv2.imread(self.pairs[i][0]), cv2.COLOR_BGR2RGB)
        gt  = (cv2.imread(self.pairs[i][1], 0) == 255).astype(np.uint8)

        img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
        gt  = cv2.resize(gt, (IMG_SIZE, IMG_SIZE), cv2.INTER_NEAREST)

        img = img.astype(np.float32) / 255.0
        img[...,0] = (img[...,0] - 0.485) / 0.229
        img[...,1] = (img[...,1] - 0.456) / 0.224
        img[...,2] = (img[...,2] - 0.406) / 0.225

        img = torch.from_numpy(img.transpose(2,0,1))
        gt  = torch.from_numpy(gt)

        return img, gt

val_loader = DataLoader(ValDataset(val_pairs), batch_size=1, shuffle=False)
print("[INFO] Validation samples:", len(val_pairs))

# -----------------------------
# LOAD MODEL (SAME AS STAGE 3)
# -----------------------------
model = smp.UnetPlusPlus(
    encoder_name="timm-efficientnet-b4",
    encoder_weights=None,
    in_channels=3,
    classes=1,
).to(DEVICE)

model.load_state_dict(torch.load("/kaggle/working/best_unetpp.pt", map_location=DEVICE))
model.eval()

# -----------------------------
# METRIC
# -----------------------------
def dice_empty_aware(pred, gt, eps=1e-7):
    if gt.sum() == 0:
        return 1.0 if pred.sum() == 0 else 0.0
    inter = (pred * gt).sum()
    return float((2*inter + eps) / (pred.sum() + gt.sum() + eps))

# -----------------------------
# POSTPROCESS
# -----------------------------
def postprocess(prob, thr, min_area):
    binm = (prob > thr).astype(np.uint8)
    binm = cv2.morphologyEx(binm, cv2.MORPH_CLOSE, np.ones((3,3), np.uint8))

    n, lbl, stat, _ = cv2.connectedComponentsWithStats(binm, 8)
    out = np.zeros_like(binm)
    for i in range(1, n):
        if stat[i, cv2.CC_STAT_AREA] >= min_area:
            out[lbl == i] = 1
    return out

# -----------------------------
# OPTUNA OBJECTIVE (FINAL)
# -----------------------------
def objective(trial):

    thr = trial.suggest_float("thr", 0.35, 0.48)
    min_area = trial.suggest_int("min_area", 100, 200, step=20)
    empty_thr = trial.suggest_float("empty_thr", 0.015, 0.035)

    dices = []

    with torch.no_grad():
        for img, gt in val_loader:
            img = img.to(DEVICE)
            gt  = gt.numpy()[0]

            logits = model(img)
            logits = F.interpolate(
                logits, size=(IMG_SIZE, IMG_SIZE),
                mode="bilinear", align_corners=False
            )
            prob = torch.sigmoid(logits)[0,0].cpu().numpy()

            if prob.max() < empty_thr:
                pred = np.zeros_like(gt)
            else:
                pred = postprocess(prob, thr, min_area)

            dices.append(dice_empty_aware(pred, gt))

    return float(np.mean(dices))

# -----------------------------
# RUN OPTUNA
# -----------------------------
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=30, show_progress_bar=True)

OPT_CONFIG = study.best_params
print("\n[STAGE 4 BEST CONFIG]")
for k,v in OPT_CONFIG.items():
    print(f"{k}: {v}")

print("\n[STAGE 4 COMPLETE — READY FOR STAGE 5]")


Device: cuda
[INFO] Validation samples: 75


[I 2026-02-08 00:53:43,999] A new study created in memory with name: no-name-13dce3d1-6d72-4b28-aa77-349a2ec4c870


  0%|          | 0/30 [00:00<?, ?it/s]

[I 2026-02-08 00:53:50,450] Trial 0 finished with value: 0.632981106837851 and parameters: {'thr': 0.45719634761420547, 'min_area': 100, 'empty_thr': 0.02889349300028172}. Best is trial 0 with value: 0.632981106837851.
[I 2026-02-08 00:53:56,681] Trial 1 finished with value: 0.6232572641224685 and parameters: {'thr': 0.37032090287276687, 'min_area': 200, 'empty_thr': 0.019192367288616372}. Best is trial 0 with value: 0.632981106837851.
[I 2026-02-08 00:54:02,933] Trial 2 finished with value: 0.6328215224604353 and parameters: {'thr': 0.45447017500268205, 'min_area': 160, 'empty_thr': 0.017990198822264252}. Best is trial 0 with value: 0.632981106837851.
[I 2026-02-08 00:54:09,161] Trial 3 finished with value: 0.6299471191116696 and parameters: {'thr': 0.42414452264475877, 'min_area': 160, 'empty_thr': 0.022567771083660808}. Best is trial 0 with value: 0.632981106837851.
[I 2026-02-08 00:54:15,543] Trial 4 finished with value: 0.6308345195201548 and parameters: {'thr': 0.4338376592864041

# Inference, Encoding & Submission

In [6]:
# ============================================================
# STAGE 5 — FINAL INFERENCE (LB-SAFE · 80+ READY)
# MATCH STAGE 3 & 4 EXACTLY
# ============================================================

import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
import cv2
from pathlib import Path
from tqdm import tqdm
import segmentation_models_pytorch as smp

# -----------------------------
# PATHS & DEVICE
# -----------------------------
DATA_ROOT = Path("/kaggle/input/data-science-ara-7-0/dataset/dataset")
TEST_IMG_DIR = DATA_ROOT / "test/images"

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", DEVICE)

# -----------------------------
# LOAD OPT CONFIG
# -----------------------------
THR        = float(OPT_CONFIG["thr"])
MIN_AREA  = int(OPT_CONFIG["min_area"])
EMPTY_THR = float(OPT_CONFIG["empty_thr"])

print("[CONFIG]", THR, MIN_AREA, EMPTY_THR)

# -----------------------------
# INFERENCE CONFIG (MATCH STAGE 3)
# -----------------------------
INPUT_SIZE = 512
MEAN = (0.485, 0.456, 0.406)
STD  = (0.229, 0.224, 0.225)

# -----------------------------
# LOAD MODEL (SAME AS STAGE 3)
# -----------------------------
model = smp.UnetPlusPlus(
    encoder_name="timm-efficientnet-b4",   # ✅ HARUS SAMA
    encoder_weights=None,
    in_channels=3,
    classes=1,
).to(DEVICE)

model.load_state_dict(torch.load("/kaggle/working/best_unetpp.pt", map_location=DEVICE))
model.eval()
print("[INFO] UNet++ loaded")

# -----------------------------
# RLE ENCODER
# -----------------------------
def encode_rle(mask):
    pixels = mask.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[0::2]
    return " ".join(str(x) for x in runs)

# -----------------------------
# POSTPROCESS (SAM-FRIENDLY)
# -----------------------------
def postprocess(prob):
    binm = (prob > THR).astype(np.uint8)

    # ONLY light closing — DO NOT OPEN
    binm = cv2.morphologyEx(
        binm, cv2.MORPH_CLOSE, np.ones((3,3), np.uint8)
    )

    n, labels, stats, _ = cv2.connectedComponentsWithStats(binm, 8)
    out = np.zeros_like(binm)

    for i in range(1, n):
        if stats[i, cv2.CC_STAT_AREA] >= MIN_AREA:
            out[labels == i] = 1

    return out

# -----------------------------
# FINAL INFERENCE
# -----------------------------
records = []
test_images = sorted(TEST_IMG_DIR.glob("*.jpg"))
assert len(test_images) == 295

with torch.no_grad():
    for p in tqdm(test_images):
        img = cv2.cvtColor(cv2.imread(str(p)), cv2.COLOR_BGR2RGB)
        h, w = img.shape[:2]

        img_r = cv2.resize(img, (INPUT_SIZE, INPUT_SIZE)).astype(np.float32) / 255.0
        for c in range(3):
            img_r[...,c] = (img_r[...,c] - MEAN[c]) / STD[c]

        x = torch.from_numpy(img_r.transpose(2,0,1)).unsqueeze(0).to(DEVICE)

        # ---- MODEL ----
        logits = model(x)
        logits = F.interpolate(
            logits, size=(INPUT_SIZE, INPUT_SIZE),
            mode="bilinear", align_corners=False
        )
        prob = torch.sigmoid(logits)[0,0].cpu().numpy()

        prob = cv2.resize(prob, (w, h))

        if prob.max() < EMPTY_THR:
            pred = np.zeros((h,w), np.uint8)
        else:
            pred = postprocess(prob)

        rle = "" if pred.sum() == 0 else encode_rle(pred)
        records.append({"ImageId": p.name, "rle": rle})

# -----------------------------
# SAVE SUBMISSION
# -----------------------------
df = pd.DataFrame(records).sort_values("ImageId").reset_index(drop=True)
df.to_csv("/kaggle/working/submission.csv", index=False)

print("[DONE] rows:", len(df), "empty:", (df.rle=="").sum())
df.head()

Device: cuda
[CONFIG] 0.4797183396856195 120 0.03142493728133388
[INFO] UNet++ loaded


100%|██████████| 295/295 [00:20<00:00, 14.14it/s]


[DONE] rows: 295 empty: 0


Unnamed: 0,ImageId,rle
0,test_001.jpg,2542 4 2841 6 3140 8 3439 9 3738 11 4038 12 43...
1,test_002.jpg,4548 12 5266 16 5986 17 6706 19 7424 26 8144 2...
2,test_003.jpg,2236217 11 2238512 15 2240807 18 2243102 20 22...
3,test_004.jpg,7750 5 8043 2 8049 7 8342 3 8349 8 8641 16 894...
4,test_005.jpg,41815 1 42115 1 42414 6 42714 6 43014 6 43314 ...
