# Data Understanding & Preparation

In [1]:
# ============================================================
# STAGE 1 — Data Understanding & Preparation (REVISED · LB-READY)
# Purpose:
# - Validate dataset integrity
# - Quantify Dice risk factors (empty / tiny / fragmented)
# - Derive data-driven priors for:
#   • sampling strategy
#   • min-area postprocess
#   • threshold sweep
# - Produce manifest for downstream stages
# ============================================================

from pathlib import Path
import numpy as np
import cv2
import pandas as pd
from tqdm import tqdm
import re

# -----------------------------
# CONFIG
# -----------------------------
DATA_ROOT = Path("/kaggle/input/data-science-ara-7-0/dataset/dataset")
TRAIN_IMG_DIR = DATA_ROOT / "train/images"
TRAIN_MASK_DIR = DATA_ROOT / "train/mask"
TEST_IMG_DIR  = DATA_ROOT / "test/images"

IMG_EXTS = {".jpg", ".jpeg", ".png"}

# -----------------------------
# 1. LOAD FILES
# -----------------------------
train_images = sorted([p for p in TRAIN_IMG_DIR.iterdir() if p.suffix.lower() in IMG_EXTS])
train_masks  = sorted([p for p in TRAIN_MASK_DIR.iterdir() if p.suffix.lower() in IMG_EXTS])
test_images  = sorted([p for p in TEST_IMG_DIR.iterdir() if p.suffix.lower() in IMG_EXTS])

print(f"[INFO] Train images : {len(train_images)}")
print(f"[INFO] Train masks  : {len(train_masks)}")
print(f"[INFO] Test images  : {len(test_images)}")

# -----------------------------
# 2. INDEX MASKS
# -----------------------------
def extract_index(name: str):
    m = re.search(r"(\d+)", name)
    return m.group(1) if m else None

mask_index = {extract_index(m.stem): m for m in train_masks if extract_index(m.stem)}

# -----------------------------
# 3. PAIR IMAGE–MASK
# -----------------------------
pairs = []
for img in train_images:
    idx = extract_index(img.stem)
    if idx in mask_index:
        pairs.append({
            "image_path": img,
            "mask_path": mask_index[idx],
            "id": idx
        })

assert len(pairs) > 0
print(f"[INFO] Valid image-mask pairs: {len(pairs)}")

# -----------------------------
# 4. MORPHOLOGY & DICE-RISK ANALYSIS
# -----------------------------
records = []
all_component_areas = []

for p in tqdm(pairs, desc="Analyzing dataset"):
    mask = cv2.imread(str(p["mask_path"]), cv2.IMREAD_GRAYSCALE)
    h, w = mask.shape
    total_pixels = h * w

    bin_mask = (mask == 255).astype(np.uint8)
    pothole_pixels = bin_mask.sum()
    area_ratio = pothole_pixels / total_pixels

    num_labels, _, stats, _ = cv2.connectedComponentsWithStats(
        bin_mask, connectivity=8
    )

    component_areas = stats[1:, cv2.CC_STAT_AREA] if num_labels > 1 else []
    if len(component_areas) > 0:
        all_component_areas.extend(component_areas.tolist())

    # bucket for stratified sampling
    if pothole_pixels == 0:
        bucket = "empty"
    elif area_ratio < 0.002:
        bucket = "tiny"
    elif area_ratio < 0.01:
        bucket = "small"
    elif area_ratio < 0.05:
        bucket = "medium"
    else:
        bucket = "large"

    records.append({
        "image": p["image_path"].name,
        "image_path": str(p["image_path"]),
        "mask_path": str(p["mask_path"]),
        "height": h,
        "width": w,
        "has_pothole": int(pothole_pixels > 0),
        "area_ratio": area_ratio,
        "total_pothole_pixels": pothole_pixels,
        "num_components": len(component_areas),
        "max_component_pixels": component_areas.max() if len(component_areas) > 0 else 0,
        "bucket": bucket,
    })

df = pd.DataFrame(records)

# -----------------------------
# 5. CORE INSIGHTS
# -----------------------------
print("\n[INSIGHT] Pothole presence:")
print(df["has_pothole"].value_counts())

print("\n[INSIGHT] Area ratio (% image):")
print(df["area_ratio"].describe(percentiles=[0.1, 0.25, 0.5, 0.75, 0.9]))

print("\n[INSIGHT] Bucket distribution:")
print(df["bucket"].value_counts(normalize=True).round(3))

# -----------------------------
# 6. SMALL OBJECT ANALYSIS (POSTPROCESS PRIOR)
# -----------------------------
comp_series = pd.Series(all_component_areas)

print("\n[INSIGHT] Connected component area (px):")
print(comp_series.describe(percentiles=[0.1, 0.25, 0.5, 0.75, 0.9]))

MIN_AREA_PX = int(comp_series.quantile(0.10))
print(f"\n[LOCKED PRIOR] MIN_AREA_PX ≈ {MIN_AREA_PX}")

# -----------------------------
# 7. DICE FEASIBILITY SIGNAL
# -----------------------------
tiny_ratio = (df["area_ratio"] < 0.01).mean()
print(f"\n[FEASIBILITY] <1% area images: {tiny_ratio:.2%}")

if tiny_ratio > 0.6:
    feasibility = "HARD"
elif tiny_ratio > 0.4:
    feasibility = "MODERATE"
else:
    feasibility = "FAVORABLE"

print(f"[FEASIBILITY STATUS] {feasibility}")

# -----------------------------
# 8. THRESHOLD PRIOR
# -----------------------------
THR_START, THR_END = 0.30, 0.45
print("\n[LOCKED THRESHOLD PRIOR]")
print(f"Use sweep range: {THR_START:.2f} – {THR_END:.2f}")

# -----------------------------
# 9. FINAL MANIFEST (DOWNSTREAM READY)
# -----------------------------
df_manifest = df[[
    "image_path",
    "mask_path",
    "has_pothole",
    "area_ratio",
    "bucket"
]].copy()

print(f"\n[INFO] Final training samples: {len(df_manifest)}")

print("\n[STAGE 1 COMPLETE — LB-READY]")
print("✓ Dataset validated")
print("✓ Sampling buckets defined")
print("✓ Min-area & threshold locked")
print("✓ Manifest ready for STAGE 2/3")


[INFO] Train images : 498
[INFO] Train masks  : 498
[INFO] Test images  : 295
[INFO] Valid image-mask pairs: 498


Analyzing dataset: 100%|██████████| 498/498 [00:11<00:00, 44.37it/s]



[INSIGHT] Pothole presence:
has_pothole
1    498
Name: count, dtype: int64

[INSIGHT] Area ratio (% image):
count    498.000000
mean       0.134860
std        0.128772
min        0.000235
10%        0.007938
25%        0.040943
50%        0.091678
75%        0.193834
90%        0.329536
max        0.674005
Name: area_ratio, dtype: float64

[INSIGHT] Bucket distribution:
bucket
large     0.691
medium    0.191
small     0.066
tiny      0.052
Name: proportion, dtype: float64

[INSIGHT] Connected component area (px):
count    2.122000e+03
mean     5.588544e+04
std      3.030841e+05
min      1.000000e+00
10%      1.301000e+02
25%      3.930000e+02
50%      1.913000e+03
75%      1.203275e+04
90%      5.370160e+04
max      6.700584e+06
dtype: float64

[LOCKED PRIOR] MIN_AREA_PX ≈ 130

[FEASIBILITY] <1% area images: 11.85%
[FEASIBILITY STATUS] FAVORABLE

[LOCKED THRESHOLD PRIOR]
Use sweep range: 0.30 – 0.45

[INFO] Final training samples: 498

[STAGE 1 COMPLETE — LB-READY]
✓ Dataset validated

# Preprocessing & Data Augmentation

In [2]:
# ============================================================
# STAGE 2 — Preprocessing & Data Augmentation (FINAL · 0.80+)
# ============================================================

import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2

IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD  = (0.229, 0.224, 0.225)

# ============================================================
# TRAIN AUGMENTATION — MASK-AWARE & SHAPE-ROBUST
# ============================================================
train_transform_512 = A.Compose(
    [
        # --- FIXED RESOLUTION ---
        A.Resize(512, 512, interpolation=cv2.INTER_LINEAR),

        # --- MASK-AWARE SPATIAL FOCUS (CRITICAL) ---
        A.CropNonEmptyMaskIfExists(
            height=448,
            width=448,
            p=0.40,
        ),
        A.Resize(512, 512, interpolation=cv2.INTER_LINEAR),

        # --- SAFE GEOMETRY ---
        A.HorizontalFlip(p=0.5),

        A.Affine(
            scale=(0.97, 1.05),
            translate_percent=(0.0, 0.03),
            rotate=(-2.5, 2.5),
            shear=(-1.5, 1.5),
            interpolation=cv2.INTER_LINEAR,
            mode=cv2.BORDER_REFLECT_101,
            p=0.30,
        ),

        # --- SHAPE DEFORMATION (KEY FOR 0.80) ---
        A.ElasticTransform(
            alpha=20,
            sigma=6,
            alpha_affine=4,
            border_mode=cv2.BORDER_REFLECT_101,
            p=0.25,
        ),

        # --- PHOTOMETRIC ---
        A.RandomBrightnessContrast(
            brightness_limit=0.18,
            contrast_limit=0.18,
            p=0.65,
        ),

        A.HueSaturationValue(
            hue_shift_limit=5,
            sat_shift_limit=10,
            val_shift_limit=5,
            p=0.30,
        ),

        # --- SHADOW (CONSERVATIVE) ---
        A.RandomShadow(
            shadow_roi=(0, 0.6, 1, 1),
            num_shadows_lower=1,
            num_shadows_upper=1,
            shadow_dimension=4,
            p=0.15,
        ),

        # --- VERY MILD NOISE ---
        A.OneOf(
            [
                A.GaussianBlur(blur_limit=3),
                A.GaussNoise(var_limit=(4.0, 12.0)),
            ],
            p=0.12,
        ),

        # --- NORMALIZE ---
        A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
        ToTensorV2(),
    ],
    additional_targets={"mask": "mask"},
)

# ============================================================
# VALID / TEST (STRICT)
# ============================================================
valid_transform = A.Compose(
    [
        A.Resize(512, 512, interpolation=cv2.INTER_LINEAR),
        A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
        ToTensorV2(),
    ],
    additional_targets={"mask": "mask"},
)

test_transform = A.Compose(
    [
        A.Resize(512, 512, interpolation=cv2.INTER_LINEAR),
        A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
        ToTensorV2(),
    ]
)

print("[STAGE 2 FINAL — SHAPE-AWARE & 0.80-READY]")


[STAGE 2 FINAL — SHAPE-AWARE & 0.80-READY]


  A.Affine(
  A.ElasticTransform(
  A.RandomShadow(
  A.GaussNoise(var_limit=(4.0, 12.0)),


# Model Construction & Training

In [3]:
!pip install -q segmentation-models-pytorch==0.3.3 timm

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.8/58.8 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m68.5/68.5 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m106.7/106.7 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m37.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for efficientnet-pytorch (setup.py) ... [?25l[?25hdone
  Building wheel for pretrainedmodels (setup.py) ... [?25l[?25hdone


In [4]:
# ============================================================
# STAGE 3 — PATCH + FULL IMAGE TRAINING (FINAL · 0.80 CORE)
# UNet++ + EfficientNet-B5 | 768 RES | AMP SAFE | ONE CELL
# ============================================================

import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

import random, re, cv2
import numpy as np
import pandas as pd
from pathlib import Path
from tqdm import tqdm

import torch
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.cuda.amp import autocast, GradScaler

import segmentation_models_pytorch as smp
import albumentations as A
from albumentations.pytorch import ToTensorV2
from sklearn.model_selection import train_test_split

# -----------------------------
# CONFIG
# -----------------------------
SEED = 42
IMG_SIZE = 768
PATCH = 384

PATCH_EPOCHS = 25
FULL_EPOCHS  = 18

PATCH_BATCH = 4
FULL_BATCH  = 2
ACCUM = 2

LR_PATCH = 3e-4
LR_FULL  = 1e-5

FREEZE_EPOCHS = 10
THR_RANGE = np.linspace(0.35, 0.50, 7)

DATA_ROOT = Path("/kaggle/input/data-science-ara-7-0/dataset/dataset")
IMG_DIR = DATA_ROOT / "train/images"
MSK_DIR = DATA_ROOT / "train/mask"

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

scaler = GradScaler()

print("Device:", DEVICE)

# ============================================================
# BUILD MANIFEST
# ============================================================
def extract_idx(name):
    return re.search(r"(\d+)", name).group(1)

pairs = []
for img in IMG_DIR.iterdir():
    idx = extract_idx(img.name)
    msk = MSK_DIR / f"mask_{idx}.png"
    if msk.exists():
        pairs.append((str(img), str(msk)))

df = pd.DataFrame(pairs, columns=["image_path", "mask_path"])
df_train, df_val = train_test_split(
    df, test_size=0.15, random_state=SEED, shuffle=True
)

print("Train:", len(df_train), "| Val:", len(df_val))

# ============================================================
# DATASETS
# ============================================================
class PatchDataset(Dataset):
    def __init__(self, df, tf):
        self.df = df.reset_index(drop=True)
        self.tf = tf

    def __len__(self):
        return len(self.df) * 3

    def _safe_patch(self, img, mask, x1, y1):
        h, w = img.shape[:2]
        x1 = max(0, min(x1, w - PATCH))
        y1 = max(0, min(y1, h - PATCH))
        img_c = img[y1:y1+PATCH, x1:x1+PATCH]
        mask_c = mask[y1:y1+PATCH, x1:x1+PATCH]
        if img_c.shape[:2] != (PATCH, PATCH):
            img_c = cv2.resize(img, (PATCH, PATCH))
            mask_c = cv2.resize(mask, (PATCH, PATCH),
                                interpolation=cv2.INTER_NEAREST)
        return img_c, mask_c

    def __getitem__(self, idx):
        row = self.df.iloc[idx % len(self.df)]
        img = cv2.imread(row.image_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        mask = cv2.imread(row.mask_path, cv2.IMREAD_GRAYSCALE)
        mask = (mask == 255).astype(np.uint8)

        if mask.sum() > 0 and random.random() < 0.7:
            ys, xs = np.where(mask > 0)
            i = random.randint(0, len(xs)-1)
            img, mask = self._safe_patch(
                img, mask,
                xs[i] - PATCH//2,
                ys[i] - PATCH//2
            )
        else:
            h, w = img.shape[:2]
            if h >= PATCH and w >= PATCH:
                x1 = random.randint(0, w - PATCH)
                y1 = random.randint(0, h - PATCH)
                img, mask = self._safe_patch(img, mask, x1, y1)
            else:
                img = cv2.resize(img, (PATCH, PATCH))
                mask = cv2.resize(mask, (PATCH, PATCH),
                                  interpolation=cv2.INTER_NEAREST)

        aug = self.tf(image=img, mask=mask)
        return aug["image"], aug["mask"].unsqueeze(0).float()


class FullDataset(Dataset):
    def __init__(self, df, tf):
        self.df = df.reset_index(drop=True)
        self.tf = tf

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = cv2.imread(row.image_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        mask = cv2.imread(row.mask_path, cv2.IMREAD_GRAYSCALE)
        mask = (mask == 255).astype(np.float32)
        aug = self.tf(image=img, mask=mask)
        return aug["image"], aug["mask"].unsqueeze(0)

# ============================================================
# TRANSFORMS
# ============================================================
norm = dict(mean=(0.485,0.456,0.406),
            std=(0.229,0.224,0.225))

patch_tf = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(0.15, 0.15, p=0.5),
    A.Normalize(**norm),
    ToTensorV2(),
])

full_train_tf = A.Compose([
    A.Resize(IMG_SIZE, IMG_SIZE),
    A.HorizontalFlip(p=0.5),
    A.Normalize(**norm),
    ToTensorV2(),
])

full_val_tf = A.Compose([
    A.Resize(IMG_SIZE, IMG_SIZE),
    A.Normalize(**norm),
    ToTensorV2(),
])

# ============================================================
# MODEL & LOSS
# ============================================================
model = smp.UnetPlusPlus(
    encoder_name="efficientnet-b5",
    encoder_weights="imagenet",
    in_channels=3,
    classes=1,
).to(DEVICE)

dice  = smp.losses.DiceLoss(mode="binary", from_logits=True)
focal = smp.losses.FocalLoss(mode="binary", gamma=2.0)

# ============================================================
# PHASE A — PATCH TRAINING
# ============================================================
patch_loader = DataLoader(
    PatchDataset(df_train, patch_tf),
    batch_size=PATCH_BATCH,
    shuffle=True,
    num_workers=2,
    pin_memory=True,
)

opt = AdamW(model.parameters(), lr=LR_PATCH, weight_decay=1e-4)
sch = CosineAnnealingLR(opt, T_max=PATCH_EPOCHS)

for e in range(1, PATCH_EPOCHS+1):
    model.train()
    tot = 0
    for x,y in tqdm(patch_loader, desc=f"Patch {e}"):
        x,y = x.to(DEVICE), y.to(DEVICE)
        opt.zero_grad()
        with autocast():
            loss = dice(model(x),y) + 0.5*focal(model(x),y)
        scaler.scale(loss).backward()
        scaler.step(opt)
        scaler.update()
        tot += loss.item()
    sch.step()
    print(f"Patch Epoch {e:02d} | Loss {tot:.4f}")

# ============================================================
# PHASE B — FULL IMAGE FINETUNE (768)
# ============================================================
for p in model.encoder.parameters():
    p.requires_grad = False

opt = AdamW(
    [
        {"params": model.encoder.parameters(), "lr": LR_FULL*0.1},
        {"params": model.decoder.parameters(), "lr": LR_FULL},
    ],
    weight_decay=1e-4
)

sch = CosineAnnealingLR(opt, T_max=FULL_EPOCHS)

train_loader = DataLoader(
    FullDataset(df_train, full_train_tf),
    batch_size=FULL_BATCH,
    shuffle=True,
    num_workers=2,
    pin_memory=True,
)

val_loader = DataLoader(
    FullDataset(df_val, full_val_tf),
    batch_size=FULL_BATCH,
    shuffle=False,
    num_workers=2,
    pin_memory=True,
)

@torch.no_grad()
def validate():
    model.eval()
    best = 0
    for thr in THR_RANGE:
        scores=[]
        for x,y in val_loader:
            x,y = x.to(DEVICE), y.to(DEVICE)
            p = (torch.sigmoid(model(x))>thr).float()
            inter=(p*y).sum((2,3))
            union=p.sum((2,3))+y.sum((2,3))
            scores.append(((2*inter+1e-7)/(union+1e-7)).mean().item())
        best=max(best,np.mean(scores))
    return best

best=0
for e in range(1, FULL_EPOCHS+1):
    model.train()
    if e==FREEZE_EPOCHS+1:
        for p in model.encoder.parameters():
            p.requires_grad=True
        print("[INFO] Encoder unfrozen")

    opt.zero_grad()
    for i,(x,y) in enumerate(tqdm(train_loader, desc=f"Full {e}")):
        x,y=x.to(DEVICE),y.to(DEVICE)
        with autocast():
            loss=(dice(model(x),y)+0.5*focal(model(x),y))/ACCUM
        scaler.scale(loss).backward()
        if (i+1)%ACCUM==0:
            scaler.step(opt)
            scaler.update()
            opt.zero_grad()

    sch.step()
    vd=validate()
    print(f"Full Epoch {e:02d} | ValDice {vd:.4f}")
    if vd>best:
        best=vd
        torch.save(model.state_dict(),"/kaggle/working/unetpp_best.pt")
        print(">> Best saved")

print(f"\n[BEST VAL DICE] {best:.4f}")
print("[STAGE 3 DONE — 0.80 TRACK]")


  scaler = GradScaler()


Device: cuda
Train: 423 | Val: 75
Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b5-b6417697.pth" to /root/.cache/torch/hub/checkpoints/efficientnet-b5-b6417697.pth


100%|██████████| 117M/117M [00:09<00:00, 13.0MB/s]
  with autocast():
Patch 1: 100%|██████████| 318/318 [03:28<00:00,  1.53it/s]


Patch Epoch 01 | Loss 162.1676


Patch 2: 100%|██████████| 318/318 [03:24<00:00,  1.55it/s]


Patch Epoch 02 | Loss 121.7639


Patch 3: 100%|██████████| 318/318 [03:24<00:00,  1.56it/s]


Patch Epoch 03 | Loss 112.1714


Patch 4: 100%|██████████| 318/318 [03:24<00:00,  1.55it/s]


Patch Epoch 04 | Loss 105.5874


Patch 5: 100%|██████████| 318/318 [03:24<00:00,  1.55it/s]


Patch Epoch 05 | Loss 89.7590


Patch 6: 100%|██████████| 318/318 [03:24<00:00,  1.56it/s]


Patch Epoch 06 | Loss 82.1440


Patch 7: 100%|██████████| 318/318 [03:24<00:00,  1.55it/s]


Patch Epoch 07 | Loss 81.1956


Patch 8: 100%|██████████| 318/318 [03:24<00:00,  1.55it/s]


Patch Epoch 08 | Loss 79.7978


Patch 9: 100%|██████████| 318/318 [03:24<00:00,  1.55it/s]


Patch Epoch 09 | Loss 75.4568


Patch 10: 100%|██████████| 318/318 [03:24<00:00,  1.56it/s]


Patch Epoch 10 | Loss 66.7382


Patch 11: 100%|██████████| 318/318 [03:25<00:00,  1.55it/s]


Patch Epoch 11 | Loss 64.2680


Patch 12: 100%|██████████| 318/318 [03:24<00:00,  1.55it/s]


Patch Epoch 12 | Loss 63.7870


Patch 13: 100%|██████████| 318/318 [03:24<00:00,  1.55it/s]


Patch Epoch 13 | Loss 52.3710


Patch 14: 100%|██████████| 318/318 [03:25<00:00,  1.55it/s]


Patch Epoch 14 | Loss 55.0952


Patch 15: 100%|██████████| 318/318 [03:24<00:00,  1.55it/s]


Patch Epoch 15 | Loss 56.4241


Patch 16: 100%|██████████| 318/318 [03:24<00:00,  1.55it/s]


Patch Epoch 16 | Loss 50.2196


Patch 17: 100%|██████████| 318/318 [03:24<00:00,  1.56it/s]


Patch Epoch 17 | Loss 48.6776


Patch 18: 100%|██████████| 318/318 [03:24<00:00,  1.55it/s]


Patch Epoch 18 | Loss 45.7356


Patch 19: 100%|██████████| 318/318 [03:24<00:00,  1.55it/s]


Patch Epoch 19 | Loss 45.8990


Patch 20: 100%|██████████| 318/318 [03:24<00:00,  1.55it/s]


Patch Epoch 20 | Loss 44.2987


Patch 21: 100%|██████████| 318/318 [03:24<00:00,  1.56it/s]


Patch Epoch 21 | Loss 42.6991


Patch 22: 100%|██████████| 318/318 [03:24<00:00,  1.56it/s]


Patch Epoch 22 | Loss 42.4521


Patch 23: 100%|██████████| 318/318 [03:24<00:00,  1.55it/s]


Patch Epoch 23 | Loss 43.3448


Patch 24: 100%|██████████| 318/318 [03:24<00:00,  1.55it/s]


Patch Epoch 24 | Loss 38.7987


Patch 25: 100%|██████████| 318/318 [03:24<00:00,  1.55it/s]


Patch Epoch 25 | Loss 43.2907


  with autocast():
Full 1: 100%|██████████| 212/212 [01:57<00:00,  1.80it/s]


Full Epoch 01 | ValDice 0.7124
>> Best saved


Full 2: 100%|██████████| 212/212 [01:56<00:00,  1.82it/s]


Full Epoch 02 | ValDice 0.7159
>> Best saved


Full 3: 100%|██████████| 212/212 [01:56<00:00,  1.82it/s]


Full Epoch 03 | ValDice 0.7167
>> Best saved


Full 4: 100%|██████████| 212/212 [01:56<00:00,  1.82it/s]


Full Epoch 04 | ValDice 0.7113


Full 5: 100%|██████████| 212/212 [01:56<00:00,  1.82it/s]


Full Epoch 05 | ValDice 0.7175
>> Best saved


Full 6: 100%|██████████| 212/212 [01:56<00:00,  1.82it/s]


Full Epoch 06 | ValDice 0.7137


Full 7: 100%|██████████| 212/212 [01:56<00:00,  1.82it/s]


Full Epoch 07 | ValDice 0.7204
>> Best saved


Full 8: 100%|██████████| 212/212 [01:56<00:00,  1.82it/s]


Full Epoch 08 | ValDice 0.7199


Full 9: 100%|██████████| 212/212 [01:56<00:00,  1.82it/s]


Full Epoch 09 | ValDice 0.7145


Full 10: 100%|██████████| 212/212 [01:56<00:00,  1.82it/s]


Full Epoch 10 | ValDice 0.7205
>> Best saved
[INFO] Encoder unfrozen


Full 11: 100%|██████████| 212/212 [04:14<00:00,  1.20s/it]


Full Epoch 11 | ValDice 0.7224
>> Best saved


Full 12: 100%|██████████| 212/212 [04:14<00:00,  1.20s/it]


Full Epoch 12 | ValDice 0.7155


Full 13: 100%|██████████| 212/212 [04:14<00:00,  1.20s/it]


Full Epoch 13 | ValDice 0.7211


Full 14: 100%|██████████| 212/212 [04:14<00:00,  1.20s/it]


Full Epoch 14 | ValDice 0.7197


Full 15: 100%|██████████| 212/212 [04:14<00:00,  1.20s/it]


Full Epoch 15 | ValDice 0.7120


Full 16: 100%|██████████| 212/212 [04:14<00:00,  1.20s/it]


Full Epoch 16 | ValDice 0.7210


Full 17: 100%|██████████| 212/212 [04:14<00:00,  1.20s/it]


Full Epoch 17 | ValDice 0.7151


Full 18: 100%|██████████| 212/212 [04:14<00:00,  1.20s/it]


Full Epoch 18 | ValDice 0.7140

[BEST VAL DICE] 0.7224
[STAGE 3 DONE — 0.80 TRACK]


# Optimization, Validation & Refinement

In [5]:
# ============================================================
# STAGE 4 — Optimization & Refinement (FINAL · 0.80 SAFE)
# UNet++ + EfficientNet-B5 | 768-SPACE | Dice-correct
# ============================================================

!pip install -q optuna

import optuna
import numpy as np
import torch
import cv2
from tqdm import tqdm

import segmentation_models_pytorch as smp
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2

# -----------------------------
# DEVICE
# -----------------------------
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", DEVICE)

# ============================================================
# VALIDATION SET (FROM STAGE 3)
# ============================================================
df_val = df_val.reset_index(drop=True)

# ============================================================
# DATASET
# ============================================================
class ValDataset(Dataset):
    def __init__(self, df, tf):
        self.df = df
        self.tf = tf

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img = cv2.imread(self.df.loc[idx, "image_path"])
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        mask = cv2.imread(self.df.loc[idx, "mask_path"], cv2.IMREAD_GRAYSCALE)
        mask = (mask == 255).astype(np.uint8)

        aug = self.tf(image=img, mask=mask)
        return aug["image"], aug["mask"]

# ============================================================
# TRANSFORM — STRICT (768)
# ============================================================
IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD  = (0.229, 0.224, 0.225)

VAL_SIZE = 768

val_tf = A.Compose([
    A.Resize(VAL_SIZE, VAL_SIZE),
    A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
    ToTensorV2(),
])

val_loader = DataLoader(
    ValDataset(df_val, val_tf),
    batch_size=2,          # VRAM-safe
    shuffle=False,
    num_workers=2,
    pin_memory=True,
)

# ============================================================
# LOAD MODEL (MATCH STAGE 3)
# ============================================================
model = smp.UnetPlusPlus(
    encoder_name="efficientnet-b5",
    encoder_weights=None,
    in_channels=3,
    classes=1,
).to(DEVICE)

model.load_state_dict(
    torch.load("/kaggle/working/unetpp_best.pt", map_location=DEVICE)
)
model.eval()

print("[INFO] UNet++ B5 loaded")

# ============================================================
# COMPUTE MIN_AREA RANGE (768-SPACE · PATCH-AWARE)
# ============================================================
areas = []

for p in df_val["mask_path"]:
    m = cv2.imread(p, cv2.IMREAD_GRAYSCALE)
    m = (m == 255).astype(np.uint8)
    m = cv2.resize(m, (VAL_SIZE, VAL_SIZE), interpolation=cv2.INTER_NEAREST)

    n, _, stats, _ = cv2.connectedComponentsWithStats(m, connectivity=8)
    for i in range(1, n):
        areas.append(stats[i, cv2.CC_STAT_AREA])

areas = np.array(areas)

MIN_AREA_LO = int(np.percentile(areas, 8))
MIN_AREA_HI = int(np.percentile(areas, 30))

print(f"[INFO] min_area range (768): {MIN_AREA_LO} – {MIN_AREA_HI}")

# ============================================================
# DICE — CORRECT (EMPTY-SAFE)
# ============================================================
def dice_correct(pred, gt, eps=1e-7):
    if gt.sum() == 0 and pred.sum() == 0:
        return 1.0
    if gt.sum() == 0 and pred.sum() > 0:
        return 0.0
    inter = (pred * gt).sum()
    union = pred.sum() + gt.sum()
    return (2 * inter + eps) / (union + eps)

def remove_small(mask, min_area):
    n, labels, stats, _ = cv2.connectedComponentsWithStats(
        mask.astype(np.uint8), connectivity=8
    )
    out = np.zeros_like(mask)
    for i in range(1, n):
        if stats[i, cv2.CC_STAT_AREA] >= min_area:
            out[labels == i] = 1
    return out

# ============================================================
# OPTUNA OBJECTIVE
# ============================================================
def objective(trial):
    thr = trial.suggest_float("threshold", 0.36, 0.52)
    min_area = trial.suggest_int(
        "min_area", MIN_AREA_LO, MIN_AREA_HI, step=20
    )

    scores = []

    with torch.no_grad():
        for imgs, masks in val_loader:
            imgs = imgs.to(DEVICE)
            probs = torch.sigmoid(model(imgs)).cpu().numpy()
            masks = masks.numpy()

            for i in range(len(probs)):
                p = (probs[i, 0] > thr).astype(np.uint8)
                p = remove_small(p, min_area)
                scores.append(dice_correct(p, masks[i]))

    return float(np.mean(scores))

# ============================================================
# RUN OPTUNA
# ============================================================
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=30, show_progress_bar=True)

best = study.best_params

print("\n[OPTUNA BEST CONFIG — FINAL]")
for k, v in best.items():
    print(f"{k}: {v}")
print(f"Validation Dice: {study.best_value:.4f}")

# ============================================================
# EXPORT CONFIG
# ============================================================
OPT_CONFIG = {
    "weights": {"unetpp": 1.0},
    "threshold": best["threshold"],
    "min_area": best["min_area"],
    "val_size": VAL_SIZE,
}

print("\n[STAGE 4 COMPLETE — 0.80 TRACK]")


Device: cuda
[INFO] UNet++ B5 loaded


[I 2026-02-09 14:42:56,734] A new study created in memory with name: no-name-16deb1bc-12b5-41e9-81ac-886fa52fa094


[INFO] min_area range (768): 63 – 278


  0%|          | 0/30 [00:00<?, ?it/s]



[I 2026-02-09 14:43:02,464] Trial 0 finished with value: 0.7181261204244572 and parameters: {'threshold': 0.38461919296619707, 'min_area': 163}. Best is trial 0 with value: 0.7181261204244572.
[I 2026-02-09 14:43:08,153] Trial 1 finished with value: 0.7161471910682126 and parameters: {'threshold': 0.385768073452576, 'min_area': 243}. Best is trial 0 with value: 0.7181261204244572.
[I 2026-02-09 14:43:13,823] Trial 2 finished with value: 0.7210021154742865 and parameters: {'threshold': 0.46305068596871246, 'min_area': 83}. Best is trial 2 with value: 0.7210021154742865.
[I 2026-02-09 14:43:19,492] Trial 3 finished with value: 0.719751547902066 and parameters: {'threshold': 0.4564904856355993, 'min_area': 163}. Best is trial 2 with value: 0.7210021154742865.
[I 2026-02-09 14:43:25,182] Trial 4 finished with value: 0.7189203091299673 and parameters: {'threshold': 0.3948085422506887, 'min_area': 123}. Best is trial 2 with value: 0.7210021154742865.
[I 2026-02-09 14:43:30,920] Trial 5 finis

# Inference, Encoding & Submission

In [6]:
# ============================================================
# STAGE 5 — FINAL MULTI-SCALE INFERENCE & SUBMISSION (0.80+)
# UNet++ EffNet-B5 | 512 + 768 | LB-SAFE
# ============================================================

import numpy as np
import pandas as pd
import torch
import cv2
from pathlib import Path
from tqdm import tqdm

import segmentation_models_pytorch as smp
import albumentations as A
from albumentations.pytorch import ToTensorV2

# -----------------------------
# CONFIG
# -----------------------------
DATA_ROOT = Path("/kaggle/input/data-science-ara-7-0/dataset/dataset")
TEST_IMG_DIR = DATA_ROOT / "test/images"
SAMPLE_SUB = Path("/kaggle/input/data-science-ara-7-0/sample_submission.csv")

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

THR = OPT_CONFIG["threshold"]
MIN_AREA_768 = OPT_CONFIG["min_area"]

SCALES = [512, 768]

IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD  = (0.229, 0.224, 0.225)

# -----------------------------
# LOAD MODEL (MATCH STAGE 3)
# -----------------------------
model = smp.UnetPlusPlus(
    encoder_name="efficientnet-b5",
    encoder_weights=None,
    in_channels=3,
    classes=1,
).to(DEVICE)

model.load_state_dict(
    torch.load("/kaggle/working/unetpp_best.pt", map_location=DEVICE)
)
model.eval()

print("[INFO] UNet++ B5 loaded")

# -----------------------------
# TRANSFORM FACTORY
# -----------------------------
def build_tf(sz):
    return A.Compose([
        A.Resize(sz, sz),
        A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
        ToTensorV2(),
    ])

# -----------------------------
# RLE ENCODER
# -----------------------------
def encode_rle(mask):
    pixels = mask.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[0::2]
    return " ".join(map(str, runs))

# -----------------------------
# POSTPROCESS
# -----------------------------
def remove_small(mask, min_area):
    n, labels, stats, _ = cv2.connectedComponentsWithStats(
        mask.astype(np.uint8), connectivity=8
    )
    out = np.zeros_like(mask)
    for i in range(1, n):
        if stats[i, cv2.CC_STAT_AREA] >= min_area:
            out[labels == i] = 1
    return out

# -----------------------------
# INFERENCE
# -----------------------------
records = []
test_images = sorted(TEST_IMG_DIR.glob("*.jpg"))

with torch.no_grad():
    for img_path in tqdm(test_images, desc="Multi-scale inference"):
        img = cv2.imread(str(img_path))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        h0, w0 = img.shape[:2]

        probs_all = []

        for sz in SCALES:
            tf = build_tf(sz)
            x = tf(image=img)["image"].unsqueeze(0).to(DEVICE)
            x_f = torch.flip(x, dims=[3])

            p = torch.sigmoid(model(x))
            p_f = torch.flip(torch.sigmoid(model(x_f)), dims=[3])
            prob = ((p + p_f) / 2.0)[0, 0].cpu().numpy()

            prob = cv2.resize(prob, (768, 768))
            probs_all.append(prob)

        # ---- average multi-scale ----
        prob_768 = np.mean(probs_all, axis=0)

        # ---- global confidence guard ----
        if prob_768.max() < THR * 0.85:
            pred_768 = np.zeros((768, 768), dtype=np.uint8)
        else:
            pred_768 = (prob_768 > THR).astype(np.uint8)
            pred_768 = remove_small(pred_768, MIN_AREA_768)

        # ---- resize to original ----
        pred = cv2.resize(
            pred_768, (w0, h0), interpolation=cv2.INTER_NEAREST
        )

        rle = "" if pred.sum() == 0 else encode_rle(pred)

        records.append({
            "ImageId": img_path.name,
            "rle": rle
        })

# -----------------------------
# SUBMISSION
# -----------------------------
df_sub = pd.DataFrame(records)
df_sample = pd.read_csv(SAMPLE_SUB)
df_sub = df_sub[df_sample.columns.tolist()]

OUT_SUB = "/kaggle/working/submission.csv"
df_sub.to_csv(OUT_SUB, index=False)

print("\n[STAGE 5 COMPLETE — MULTI-SCALE SUBMISSION READY]")
print("Saved to:", OUT_SUB)
print("Total:", len(df_sub))
print("Empty RLE:", (df_sub['rle'] == '').sum())
print(df_sub.head())


[INFO] UNet++ B5 loaded


Multi-scale inference: 100%|██████████| 295/295 [01:14<00:00,  3.98it/s]


[STAGE 5 COMPLETE — MULTI-SCALE SUBMISSION READY]
Saved to: /kaggle/working/submission.csv
Total: 295
Empty RLE: 2
        ImageId                                                rle
0  test_001.jpg  4343 2 4642 4 4941 6 5241 6 5540 7 5839 9 6139...
1  test_002.jpg  69324 3 70039 2 70042 6 70758 10 71477 12 7219...
2  test_003.jpg  576283 9 578579 9 580875 9 583171 9 585467 9 5...
3  test_004.jpg                                                   
4  test_005.jpg  48714 1 49013 3 49313 5 49612 12 49912 16 4993...



