# Data Understanding & Preparation

In [1]:
# ============================================================
# STAGE 1 — Data Understanding & Preparation (REVISED · LB-READY)
# Purpose:
# - Validate dataset integrity
# - Quantify Dice risk factors (empty / tiny / fragmented)
# - Derive data-driven priors for:
#   • sampling strategy
#   • min-area postprocess
#   • threshold sweep
# - Produce manifest for downstream stages
# ============================================================

from pathlib import Path
import numpy as np
import cv2
import pandas as pd
from tqdm import tqdm
import re

# -----------------------------
# CONFIG
# -----------------------------
DATA_ROOT = Path("/kaggle/input/data-science-ara-7-0/dataset/dataset")
TRAIN_IMG_DIR = DATA_ROOT / "train/images"
TRAIN_MASK_DIR = DATA_ROOT / "train/mask"
TEST_IMG_DIR  = DATA_ROOT / "test/images"

IMG_EXTS = {".jpg", ".jpeg", ".png"}

# -----------------------------
# 1. LOAD FILES
# -----------------------------
train_images = sorted([p for p in TRAIN_IMG_DIR.iterdir() if p.suffix.lower() in IMG_EXTS])
train_masks  = sorted([p for p in TRAIN_MASK_DIR.iterdir() if p.suffix.lower() in IMG_EXTS])
test_images  = sorted([p for p in TEST_IMG_DIR.iterdir() if p.suffix.lower() in IMG_EXTS])

print(f"[INFO] Train images : {len(train_images)}")
print(f"[INFO] Train masks  : {len(train_masks)}")
print(f"[INFO] Test images  : {len(test_images)}")

# -----------------------------
# 2. INDEX MASKS
# -----------------------------
def extract_index(name: str):
    m = re.search(r"(\d+)", name)
    return m.group(1) if m else None

mask_index = {extract_index(m.stem): m for m in train_masks if extract_index(m.stem)}

# -----------------------------
# 3. PAIR IMAGE–MASK
# -----------------------------
pairs = []
for img in train_images:
    idx = extract_index(img.stem)
    if idx in mask_index:
        pairs.append({
            "image_path": img,
            "mask_path": mask_index[idx],
            "id": idx
        })

assert len(pairs) > 0
print(f"[INFO] Valid image-mask pairs: {len(pairs)}")

# -----------------------------
# 4. MORPHOLOGY & DICE-RISK ANALYSIS
# -----------------------------
records = []
all_component_areas = []

for p in tqdm(pairs, desc="Analyzing dataset"):
    mask = cv2.imread(str(p["mask_path"]), cv2.IMREAD_GRAYSCALE)
    h, w = mask.shape
    total_pixels = h * w

    bin_mask = (mask == 255).astype(np.uint8)
    pothole_pixels = bin_mask.sum()
    area_ratio = pothole_pixels / total_pixels

    num_labels, _, stats, _ = cv2.connectedComponentsWithStats(
        bin_mask, connectivity=8
    )

    component_areas = stats[1:, cv2.CC_STAT_AREA] if num_labels > 1 else []
    if len(component_areas) > 0:
        all_component_areas.extend(component_areas.tolist())

    # bucket for stratified sampling
    if pothole_pixels == 0:
        bucket = "empty"
    elif area_ratio < 0.002:
        bucket = "tiny"
    elif area_ratio < 0.01:
        bucket = "small"
    elif area_ratio < 0.05:
        bucket = "medium"
    else:
        bucket = "large"

    records.append({
        "image": p["image_path"].name,
        "image_path": str(p["image_path"]),
        "mask_path": str(p["mask_path"]),
        "height": h,
        "width": w,
        "has_pothole": int(pothole_pixels > 0),
        "area_ratio": area_ratio,
        "total_pothole_pixels": pothole_pixels,
        "num_components": len(component_areas),
        "max_component_pixels": component_areas.max() if len(component_areas) > 0 else 0,
        "bucket": bucket,
    })

df = pd.DataFrame(records)

# -----------------------------
# 5. CORE INSIGHTS
# -----------------------------
print("\n[INSIGHT] Pothole presence:")
print(df["has_pothole"].value_counts())

print("\n[INSIGHT] Area ratio (% image):")
print(df["area_ratio"].describe(percentiles=[0.1, 0.25, 0.5, 0.75, 0.9]))

print("\n[INSIGHT] Bucket distribution:")
print(df["bucket"].value_counts(normalize=True).round(3))

# -----------------------------
# 6. SMALL OBJECT ANALYSIS (POSTPROCESS PRIOR)
# -----------------------------
comp_series = pd.Series(all_component_areas)

print("\n[INSIGHT] Connected component area (px):")
print(comp_series.describe(percentiles=[0.1, 0.25, 0.5, 0.75, 0.9]))

MIN_AREA_PX = int(comp_series.quantile(0.10))
print(f"\n[LOCKED PRIOR] MIN_AREA_PX ≈ {MIN_AREA_PX}")

# -----------------------------
# 7. DICE FEASIBILITY SIGNAL
# -----------------------------
tiny_ratio = (df["area_ratio"] < 0.01).mean()
print(f"\n[FEASIBILITY] <1% area images: {tiny_ratio:.2%}")

if tiny_ratio > 0.6:
    feasibility = "HARD"
elif tiny_ratio > 0.4:
    feasibility = "MODERATE"
else:
    feasibility = "FAVORABLE"

print(f"[FEASIBILITY STATUS] {feasibility}")

# -----------------------------
# 8. THRESHOLD PRIOR
# -----------------------------
THR_START, THR_END = 0.30, 0.45
print("\n[LOCKED THRESHOLD PRIOR]")
print(f"Use sweep range: {THR_START:.2f} – {THR_END:.2f}")

# -----------------------------
# 9. FINAL MANIFEST (DOWNSTREAM READY)
# -----------------------------
df_manifest = df[[
    "image_path",
    "mask_path",
    "has_pothole",
    "area_ratio",
    "bucket"
]].copy()

print(f"\n[INFO] Final training samples: {len(df_manifest)}")

print("\n[STAGE 1 COMPLETE — LB-READY]")
print("✓ Dataset validated")
print("✓ Sampling buckets defined")
print("✓ Min-area & threshold locked")
print("✓ Manifest ready for STAGE 2/3")


[INFO] Train images : 498
[INFO] Train masks  : 498
[INFO] Test images  : 295
[INFO] Valid image-mask pairs: 498


Analyzing dataset: 100%|██████████| 498/498 [00:11<00:00, 41.92it/s]


[INSIGHT] Pothole presence:
has_pothole
1    498
Name: count, dtype: int64

[INSIGHT] Area ratio (% image):
count    498.000000
mean       0.134860
std        0.128772
min        0.000235
10%        0.007938
25%        0.040943
50%        0.091678
75%        0.193834
90%        0.329536
max        0.674005
Name: area_ratio, dtype: float64

[INSIGHT] Bucket distribution:
bucket
large     0.691
medium    0.191
small     0.066
tiny      0.052
Name: proportion, dtype: float64

[INSIGHT] Connected component area (px):
count    2.122000e+03
mean     5.588544e+04
std      3.030841e+05
min      1.000000e+00
10%      1.301000e+02
25%      3.930000e+02
50%      1.913000e+03
75%      1.203275e+04
90%      5.370160e+04
max      6.700584e+06
dtype: float64

[LOCKED PRIOR] MIN_AREA_PX ≈ 130

[FEASIBILITY] <1% area images: 11.85%
[FEASIBILITY STATUS] FAVORABLE

[LOCKED THRESHOLD PRIOR]
Use sweep range: 0.30 – 0.45

[INFO] Final training samples: 498

[STAGE 1 COMPLETE — LB-READY]
✓ Dataset validated




# Preprocessing & Data Augmentation

In [2]:
# ============================================================
# STAGE 2 — Preprocessing & Data Augmentation (FINAL · 0.80+)
# ============================================================

import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2

IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD  = (0.229, 0.224, 0.225)

# ============================================================
# TRAIN AUGMENTATION — MASK-AWARE & SHAPE-ROBUST
# ============================================================
train_transform_512 = A.Compose(
    [
        # --- FIXED RESOLUTION ---
        A.Resize(512, 512, interpolation=cv2.INTER_LINEAR),

        # --- MASK-AWARE SPATIAL FOCUS (CRITICAL) ---
        A.CropNonEmptyMaskIfExists(
            height=448,
            width=448,
            p=0.40,
        ),
        A.Resize(512, 512, interpolation=cv2.INTER_LINEAR),

        # --- SAFE GEOMETRY ---
        A.HorizontalFlip(p=0.5),

        A.Affine(
            scale=(0.97, 1.05),
            translate_percent=(0.0, 0.03),
            rotate=(-2.5, 2.5),
            shear=(-1.5, 1.5),
            interpolation=cv2.INTER_LINEAR,
            mode=cv2.BORDER_REFLECT_101,
            p=0.30,
        ),

        # --- SHAPE DEFORMATION (KEY FOR 0.80) ---
        A.ElasticTransform(
            alpha=20,
            sigma=6,
            alpha_affine=4,
            border_mode=cv2.BORDER_REFLECT_101,
            p=0.25,
        ),

        # --- PHOTOMETRIC ---
        A.RandomBrightnessContrast(
            brightness_limit=0.18,
            contrast_limit=0.18,
            p=0.65,
        ),

        A.HueSaturationValue(
            hue_shift_limit=5,
            sat_shift_limit=10,
            val_shift_limit=5,
            p=0.30,
        ),

        # --- SHADOW (CONSERVATIVE) ---
        A.RandomShadow(
            shadow_roi=(0, 0.6, 1, 1),
            num_shadows_lower=1,
            num_shadows_upper=1,
            shadow_dimension=4,
            p=0.15,
        ),

        # --- VERY MILD NOISE ---
        A.OneOf(
            [
                A.GaussianBlur(blur_limit=3),
                A.GaussNoise(var_limit=(4.0, 12.0)),
            ],
            p=0.12,
        ),

        # --- NORMALIZE ---
        A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
        ToTensorV2(),
    ],
    additional_targets={"mask": "mask"},
)

# ============================================================
# VALID / TEST (STRICT)
# ============================================================
valid_transform = A.Compose(
    [
        A.Resize(512, 512, interpolation=cv2.INTER_LINEAR),
        A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
        ToTensorV2(),
    ],
    additional_targets={"mask": "mask"},
)

test_transform = A.Compose(
    [
        A.Resize(512, 512, interpolation=cv2.INTER_LINEAR),
        A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
        ToTensorV2(),
    ]
)

print("[STAGE 2 FINAL — SHAPE-AWARE & 0.80-READY]")


[STAGE 2 FINAL — SHAPE-AWARE & 0.80-READY]


  A.Affine(
  A.ElasticTransform(
  A.RandomShadow(
  A.GaussNoise(var_limit=(4.0, 12.0)),


# Model Construction & Training

In [3]:
!pip install -q segmentation-models-pytorch==0.3.3 timm

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.8/58.8 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m68.5/68.5 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m106.7/106.7 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m36.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Building wheel for efficientnet-pytorch (setup.py) ... [?25l[?25hdone
  Building wheel for pretrainedmodels (setup.py) ... [?25l[?25hdone


In [4]:
# ============================================================
# STAGE 3 — PATCH + FULL IMAGE TRAINING (FINAL · ONE CELL)
# UNet++ EffNet-B4 | SELF-CONTAINED | LB-SAFE
# ============================================================

import random, re, cv2
import numpy as np
import pandas as pd
from pathlib import Path
from tqdm import tqdm

import torch
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR

import segmentation_models_pytorch as smp
import albumentations as A
from albumentations.pytorch import ToTensorV2
from sklearn.model_selection import train_test_split

# -----------------------------
# CONFIG
# -----------------------------
SEED = 42
IMG_SIZE = 512
PATCH = 320

PATCH_EPOCHS = 25
FULL_EPOCHS  = 18

PATCH_BATCH = 6
FULL_BATCH  = 4
ACCUM = 2

LR_PATCH = 3e-4
LR_FULL  = 1e-5

FREEZE_EPOCHS = 6
THR_RANGE = np.linspace(0.35, 0.50, 7)

DATA_ROOT = Path("/kaggle/input/data-science-ara-7-0/dataset/dataset")
IMG_DIR = DATA_ROOT / "train/images"
MSK_DIR = DATA_ROOT / "train/mask"

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

print("Device:", DEVICE)

# ============================================================
# BUILD MANIFEST
# ============================================================
def extract_idx(name):
    return re.search(r"(\d+)", name).group(1)

pairs = []
for img in IMG_DIR.iterdir():
    idx = extract_idx(img.name)
    msk = MSK_DIR / f"mask_{idx}.png"
    if msk.exists():
        pairs.append((str(img), str(msk)))

df = pd.DataFrame(pairs, columns=["image_path", "mask_path"])
df_train, df_val = train_test_split(
    df, test_size=0.15, random_state=SEED, shuffle=True
)

print("Train:", len(df_train), "| Val:", len(df_val))

# ============================================================
# DATASETS
# ============================================================
class PatchDataset(Dataset):
    def __init__(self, df, tf):
        self.df = df.reset_index(drop=True)
        self.tf = tf

    def __len__(self):
        return len(self.df) * 3

    def _safe_patch(self, img, mask, x1, y1):
        h, w = img.shape[:2]
        x1 = max(0, min(x1, w - PATCH))
        y1 = max(0, min(y1, h - PATCH))
        img_c = img[y1:y1+PATCH, x1:x1+PATCH]
        mask_c = mask[y1:y1+PATCH, x1:x1+PATCH]

        if img_c.shape[:2] != (PATCH, PATCH):
            img_c = cv2.resize(img, (PATCH, PATCH))
            mask_c = cv2.resize(mask, (PATCH, PATCH), interpolation=cv2.INTER_NEAREST)

        return img_c, mask_c

    def __getitem__(self, idx):
        row = self.df.iloc[idx % len(self.df)]
        img = cv2.imread(row.image_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        mask = cv2.imread(row.mask_path, cv2.IMREAD_GRAYSCALE)
        mask = (mask == 255).astype(np.uint8)

        if mask.sum() > 0 and random.random() < 0.7:
            ys, xs = np.where(mask > 0)
            i = random.randint(0, len(xs)-1)
            img, mask = self._safe_patch(
                img, mask,
                xs[i] - PATCH//2,
                ys[i] - PATCH//2
            )
        else:
            h, w = img.shape[:2]
            if h >= PATCH and w >= PATCH:
                x1 = random.randint(0, w - PATCH)
                y1 = random.randint(0, h - PATCH)
                img, mask = self._safe_patch(img, mask, x1, y1)
            else:
                img = cv2.resize(img, (PATCH, PATCH))
                mask = cv2.resize(mask, (PATCH, PATCH), interpolation=cv2.INTER_NEAREST)

        aug = self.tf(image=img, mask=mask)
        return aug["image"], aug["mask"].unsqueeze(0).float()


class FullDataset(Dataset):
    def __init__(self, df, tf):
        self.df = df.reset_index(drop=True)
        self.tf = tf

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = cv2.imread(row.image_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        mask = cv2.imread(row.mask_path, cv2.IMREAD_GRAYSCALE)
        mask = (mask == 255).astype(np.float32)
        aug = self.tf(image=img, mask=mask)
        return aug["image"], aug["mask"].unsqueeze(0)

# ============================================================
# TRANSFORMS
# ============================================================
norm = dict(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225))

patch_tf = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(0.15, 0.15, p=0.5),
    A.Normalize(**norm),
    ToTensorV2(),
])

full_train_tf = A.Compose([
    A.Resize(IMG_SIZE, IMG_SIZE),
    A.HorizontalFlip(p=0.5),
    A.Normalize(**norm),
    ToTensorV2(),
])

full_val_tf = A.Compose([
    A.Resize(IMG_SIZE, IMG_SIZE),
    A.Normalize(**norm),
    ToTensorV2(),
])

# ============================================================
# MODEL & LOSS
# ============================================================
model = smp.UnetPlusPlus(
    encoder_name="efficientnet-b4",
    encoder_weights="imagenet",
    in_channels=3,
    classes=1,
).to(DEVICE)

dice = smp.losses.DiceLoss(mode="binary", from_logits=True)
focal = smp.losses.FocalLoss(mode="binary", gamma=2.0)

# ============================================================
# PHASE A — PATCH TRAINING
# ============================================================
patch_loader = DataLoader(
    PatchDataset(df_train, patch_tf),
    batch_size=PATCH_BATCH,
    shuffle=True,
    num_workers=2,
    pin_memory=True,
)

opt = AdamW(model.parameters(), lr=LR_PATCH, weight_decay=1e-4)
sch = CosineAnnealingLR(opt, T_max=PATCH_EPOCHS)

for e in range(1, PATCH_EPOCHS+1):
    model.train()
    tot = 0
    for x,y in tqdm(patch_loader, desc=f"Patch {e}"):
        x,y = x.to(DEVICE), y.to(DEVICE)
        opt.zero_grad()
        loss = dice(model(x),y) + 0.5*focal(model(x),y)
        loss.backward()
        opt.step()
        tot += loss.item()
    sch.step()
    print(f"Patch Epoch {e:02d} | Loss {tot:.4f}")

# ============================================================
# PHASE B — FULL FINETUNE
# ============================================================
for p in model.encoder.parameters():
    p.requires_grad = False

opt = AdamW([
    {"params": model.encoder.parameters(), "lr": LR_FULL*0.1},
    {"params": model.decoder.parameters(), "lr": LR_FULL},
], weight_decay=1e-4)

sch = CosineAnnealingLR(opt, T_max=FULL_EPOCHS)

train_loader = DataLoader(
    FullDataset(df_train, full_train_tf),
    batch_size=FULL_BATCH,
    shuffle=True,
    num_workers=2,
    pin_memory=True,
)

val_loader = DataLoader(
    FullDataset(df_val, full_val_tf),
    batch_size=FULL_BATCH,
    shuffle=False,
    num_workers=2,
    pin_memory=True,
)

@torch.no_grad()
def validate():
    model.eval()
    best = 0
    for thr in THR_RANGE:
        scores=[]
        for x,y in val_loader:
            x,y = x.to(DEVICE), y.to(DEVICE)
            p = (torch.sigmoid(model(x))>thr).float()
            inter=(p*y).sum((2,3))
            union=p.sum((2,3))+y.sum((2,3))
            scores.append(((2*inter+1e-7)/(union+1e-7)).mean().item())
        best=max(best,np.mean(scores))
    return best

best=0
for e in range(1, FULL_EPOCHS+1):
    model.train()
    if e==FREEZE_EPOCHS+1:
        for p in model.encoder.parameters(): p.requires_grad=True
        print("[INFO] Encoder unfrozen")

    opt.zero_grad()
    for i,(x,y) in enumerate(tqdm(train_loader, desc=f"Full {e}")):
        x,y=x.to(DEVICE),y.to(DEVICE)
        loss=(dice(model(x),y)+0.5*focal(model(x),y))/ACCUM
        loss.backward()
        if (i+1)%ACCUM==0:
            opt.step(); opt.zero_grad()

    sch.step()
    vd=validate()
    print(f"Full Epoch {e:02d} | ValDice {vd:.4f}")
    if vd>best:
        best=vd
        torch.save(model.state_dict(),"/kaggle/working/unetpp_best.pt")
        print(">> Best saved")

print(f"\n[BEST VAL DICE] {best:.4f}")
print("[STAGE 3 DONE — STABLE & READY]")




Device: cuda
Train: 423 | Val: 75
Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b4-6ed6700e.pth" to /root/.cache/torch/hub/checkpoints/efficientnet-b4-6ed6700e.pth


100%|██████████| 74.4M/74.4M [00:00<00:00, 155MB/s] 
Patch 1: 100%|██████████| 212/212 [02:51<00:00,  1.23it/s]


Patch Epoch 01 | Loss 107.0744


Patch 2: 100%|██████████| 212/212 [02:57<00:00,  1.19it/s]


Patch Epoch 02 | Loss 82.7072


Patch 3: 100%|██████████| 212/212 [02:57<00:00,  1.20it/s]


Patch Epoch 03 | Loss 70.3081


Patch 4: 100%|██████████| 212/212 [02:57<00:00,  1.19it/s]


Patch Epoch 04 | Loss 65.3861


Patch 5: 100%|██████████| 212/212 [02:57<00:00,  1.20it/s]


Patch Epoch 05 | Loss 57.6771


Patch 6: 100%|██████████| 212/212 [02:56<00:00,  1.20it/s]


Patch Epoch 06 | Loss 59.2744


Patch 7: 100%|██████████| 212/212 [02:57<00:00,  1.19it/s]


Patch Epoch 07 | Loss 53.1973


Patch 8: 100%|██████████| 212/212 [02:57<00:00,  1.20it/s]


Patch Epoch 08 | Loss 49.9093


Patch 9: 100%|██████████| 212/212 [02:57<00:00,  1.20it/s]


Patch Epoch 09 | Loss 42.8931


Patch 10: 100%|██████████| 212/212 [02:56<00:00,  1.20it/s]


Patch Epoch 10 | Loss 47.1722


Patch 11: 100%|██████████| 212/212 [02:57<00:00,  1.20it/s]


Patch Epoch 11 | Loss 45.2382


Patch 12: 100%|██████████| 212/212 [02:57<00:00,  1.20it/s]


Patch Epoch 12 | Loss 41.1487


Patch 13: 100%|██████████| 212/212 [02:57<00:00,  1.20it/s]


Patch Epoch 13 | Loss 38.3622


Patch 14: 100%|██████████| 212/212 [02:56<00:00,  1.20it/s]


Patch Epoch 14 | Loss 37.5250


Patch 15: 100%|██████████| 212/212 [02:57<00:00,  1.20it/s]


Patch Epoch 15 | Loss 35.2285


Patch 16: 100%|██████████| 212/212 [02:57<00:00,  1.19it/s]


Patch Epoch 16 | Loss 35.8676


Patch 17: 100%|██████████| 212/212 [02:57<00:00,  1.20it/s]


Patch Epoch 17 | Loss 31.5309


Patch 18: 100%|██████████| 212/212 [02:58<00:00,  1.19it/s]


Patch Epoch 18 | Loss 32.5686


Patch 19: 100%|██████████| 212/212 [02:58<00:00,  1.19it/s]


Patch Epoch 19 | Loss 31.4118


Patch 20: 100%|██████████| 212/212 [02:58<00:00,  1.19it/s]


Patch Epoch 20 | Loss 31.1632


Patch 21: 100%|██████████| 212/212 [02:57<00:00,  1.19it/s]


Patch Epoch 21 | Loss 29.2109


Patch 22: 100%|██████████| 212/212 [02:58<00:00,  1.19it/s]


Patch Epoch 22 | Loss 30.6353


Patch 23: 100%|██████████| 212/212 [02:58<00:00,  1.19it/s]


Patch Epoch 23 | Loss 29.9359


Patch 24: 100%|██████████| 212/212 [02:57<00:00,  1.19it/s]


Patch Epoch 24 | Loss 28.0355


Patch 25: 100%|██████████| 212/212 [02:58<00:00,  1.19it/s]


Patch Epoch 25 | Loss 27.3739


Full 1: 100%|██████████| 106/106 [01:24<00:00,  1.25it/s]


Full Epoch 01 | ValDice 0.7318
>> Best saved


Full 2: 100%|██████████| 106/106 [01:24<00:00,  1.25it/s]


Full Epoch 02 | ValDice 0.7316


Full 3: 100%|██████████| 106/106 [01:24<00:00,  1.25it/s]


Full Epoch 03 | ValDice 0.7319
>> Best saved


Full 4: 100%|██████████| 106/106 [01:24<00:00,  1.25it/s]


Full Epoch 04 | ValDice 0.7327
>> Best saved


Full 5: 100%|██████████| 106/106 [01:24<00:00,  1.25it/s]


Full Epoch 05 | ValDice 0.7338
>> Best saved


Full 6: 100%|██████████| 106/106 [01:24<00:00,  1.25it/s]


Full Epoch 06 | ValDice 0.7319
[INFO] Encoder unfrozen


Full 7: 100%|██████████| 106/106 [02:33<00:00,  1.45s/it]


Full Epoch 07 | ValDice 0.7290


Full 8: 100%|██████████| 106/106 [02:33<00:00,  1.45s/it]


Full Epoch 08 | ValDice 0.7318


Full 9: 100%|██████████| 106/106 [02:33<00:00,  1.45s/it]


Full Epoch 09 | ValDice 0.7317


Full 10: 100%|██████████| 106/106 [02:33<00:00,  1.45s/it]


Full Epoch 10 | ValDice 0.7315


Full 11: 100%|██████████| 106/106 [02:33<00:00,  1.45s/it]


Full Epoch 11 | ValDice 0.7321


Full 12: 100%|██████████| 106/106 [02:34<00:00,  1.46s/it]


Full Epoch 12 | ValDice 0.7314


Full 13: 100%|██████████| 106/106 [02:34<00:00,  1.45s/it]


Full Epoch 13 | ValDice 0.7336


Full 14: 100%|██████████| 106/106 [02:33<00:00,  1.45s/it]


Full Epoch 14 | ValDice 0.7337


Full 15: 100%|██████████| 106/106 [02:33<00:00,  1.45s/it]


Full Epoch 15 | ValDice 0.7321


Full 16: 100%|██████████| 106/106 [02:33<00:00,  1.45s/it]


Full Epoch 16 | ValDice 0.7333


Full 17: 100%|██████████| 106/106 [02:33<00:00,  1.45s/it]


Full Epoch 17 | ValDice 0.7307


Full 18: 100%|██████████| 106/106 [02:33<00:00,  1.45s/it]


Full Epoch 18 | ValDice 0.7311

[BEST VAL DICE] 0.7338
[STAGE 3 DONE — STABLE & READY]


# Optimization, Validation & Refinement

In [5]:
# ============================================================
# STAGE 4 — Optimization & Refinement (FINAL · 0.80 SAFE)
# UNet++ ONLY | Patch-aware | Dice-correct
# ============================================================

!pip install -q optuna

import optuna
import numpy as np
import torch
import cv2
from tqdm import tqdm

import segmentation_models_pytorch as smp
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2

# -----------------------------
# DEVICE
# -----------------------------
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", DEVICE)

# ============================================================
# VALIDATION SET (FROM STAGE 3)
# ============================================================
df_val = df_val.reset_index(drop=True)

# ============================================================
# DATASET
# ============================================================
class ValDataset(Dataset):
    def __init__(self, df, tf):
        self.df = df
        self.tf = tf

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img = cv2.imread(self.df.loc[idx, "image_path"])
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        mask = cv2.imread(self.df.loc[idx, "mask_path"], cv2.IMREAD_GRAYSCALE)
        mask = (mask == 255).astype(np.uint8)
        aug = self.tf(image=img, mask=mask)
        return aug["image"], aug["mask"]

# ============================================================
# TRANSFORM (STRICT)
# ============================================================
IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD  = (0.229, 0.224, 0.225)

val_tf = A.Compose([
    A.Resize(512, 512),
    A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
    ToTensorV2(),
])

val_loader = DataLoader(
    ValDataset(df_val, val_tf),
    batch_size=4,
    shuffle=False,
    num_workers=2,
    pin_memory=True,
)

# ============================================================
# LOAD MODEL
# ============================================================
model = smp.UnetPlusPlus(
    encoder_name="efficientnet-b4",
    encoder_weights=None,
    in_channels=3,
    classes=1,
).to(DEVICE)

model.load_state_dict(torch.load("/kaggle/working/unetpp_best.pt", map_location=DEVICE))
model.eval()

print("[INFO] UNet++ loaded")

# ============================================================
# COMPUTE MIN_AREA RANGE (PATCH-AWARE)
# ============================================================
areas = []
for p in df_val["mask_path"]:
    m = cv2.imread(p, cv2.IMREAD_GRAYSCALE)
    m = (m == 255).astype(np.uint8)
    n, _, stats, _ = cv2.connectedComponentsWithStats(m, connectivity=8)
    for i in range(1, n):
        areas.append(stats[i, cv2.CC_STAT_AREA])

areas = np.array(areas)

MIN_AREA_LO = int(np.percentile(areas, 10))
MIN_AREA_HI = int(np.percentile(areas, 35))

print(f"[INFO] min_area range: {MIN_AREA_LO} – {MIN_AREA_HI}")

# ============================================================
# DICE (CORRECT & FAIR)
# ============================================================
def dice_correct(pred, gt, eps=1e-7):
    if gt.sum() == 0 and pred.sum() == 0:
        return 1.0
    if gt.sum() == 0 and pred.sum() > 0:
        return 0.0
    inter = (pred * gt).sum()
    union = pred.sum() + gt.sum()
    return (2 * inter + eps) / (union + eps)

def remove_small(mask, min_area):
    n, labels, stats, _ = cv2.connectedComponentsWithStats(
        mask.astype(np.uint8), connectivity=8
    )
    out = np.zeros_like(mask)
    for i in range(1, n):
        if stats[i, cv2.CC_STAT_AREA] >= min_area:
            out[labels == i] = 1
    return out

# ============================================================
# OPTUNA OBJECTIVE
# ============================================================
def objective(trial):
    thr = trial.suggest_float("threshold", 0.38, 0.55)
    min_area = trial.suggest_int("min_area", MIN_AREA_LO, MIN_AREA_HI, step=10)

    scores = []

    with torch.no_grad():
        for imgs, masks in val_loader:
            imgs = imgs.to(DEVICE)
            probs = torch.sigmoid(model(imgs)).cpu().numpy()
            masks = masks.numpy()

            for i in range(len(probs)):
                p = (probs[i, 0] > thr).astype(np.uint8)
                p = remove_small(p, min_area)
                scores.append(dice_correct(p, masks[i]))

    return float(np.mean(scores))

# ============================================================
# RUN OPTUNA
# ============================================================
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=30, show_progress_bar=True)

best = study.best_params

print("\n[OPTUNA BEST CONFIG — FINAL]")
for k, v in best.items():
    print(f"{k}: {v}")
print(f"Validation Dice: {study.best_value:.4f}")

# ============================================================
# EXPORT CONFIG
# ============================================================
OPT_CONFIG = {
    "weights": {"unetpp": 1.0},
    "threshold": best["threshold"],
    "min_area": best["min_area"],
}

print("\n[STAGE 4 COMPLETE — 0.80 READY]")


Device: cuda
[INFO] UNet++ loaded


[I 2026-02-08 20:04:19,338] A new study created in memory with name: no-name-2570321e-40c0-48b4-bf8a-5cb9e109a493


[INFO] min_area range: 118 – 1066


  0%|          | 0/30 [00:00<?, ?it/s]



[I 2026-02-08 20:04:23,461] Trial 0 finished with value: 0.6860239981126488 and parameters: {'threshold': 0.5422901917669363, 'min_area': 708}. Best is trial 0 with value: 0.6860239981126488.
[I 2026-02-08 20:04:27,536] Trial 1 finished with value: 0.7215595527199858 and parameters: {'threshold': 0.39535946824258306, 'min_area': 138}. Best is trial 1 with value: 0.7215595527199858.
[I 2026-02-08 20:04:31,627] Trial 2 finished with value: 0.6839207068951452 and parameters: {'threshold': 0.5314836935548741, 'min_area': 888}. Best is trial 1 with value: 0.7215595527199858.
[I 2026-02-08 20:04:35,738] Trial 3 finished with value: 0.6919621660962322 and parameters: {'threshold': 0.5361355428490915, 'min_area': 468}. Best is trial 1 with value: 0.7215595527199858.
[I 2026-02-08 20:04:39,885] Trial 4 finished with value: 0.7082637628016039 and parameters: {'threshold': 0.4378812197618113, 'min_area': 268}. Best is trial 1 with value: 0.7215595527199858.
[I 2026-02-08 20:04:44,064] Trial 5 fin

# Inference, Encoding & Submission

In [6]:
# ============================================================
# STAGE 5 — FINAL INFERENCE, RLE & SUBMISSION (0.80 SAFE)
# UNet++ ONLY | Patch-aware | Correct postprocess
# ============================================================

import numpy as np
import pandas as pd
import torch
import cv2
from pathlib import Path
from tqdm import tqdm

import segmentation_models_pytorch as smp
import albumentations as A
from albumentations.pytorch import ToTensorV2

# -----------------------------
# CONFIG
# -----------------------------
DATA_ROOT = Path("/kaggle/input/data-science-ara-7-0/dataset/dataset")
TEST_IMG_DIR = DATA_ROOT / "test/images"
SAMPLE_SUB = Path("/kaggle/input/data-science-ara-7-0/sample_submission.csv")

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

THRESHOLD = OPT_CONFIG["threshold"]
MIN_AREA_512 = OPT_CONFIG["min_area"]

IMG_SIZE = 512

IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD  = (0.229, 0.224, 0.225)

# -----------------------------
# LOAD MODEL
# -----------------------------
model = smp.UnetPlusPlus(
    encoder_name="efficientnet-b4",
    encoder_weights=None,
    in_channels=3,
    classes=1,
).to(DEVICE)

model.load_state_dict(torch.load("/kaggle/working/unetpp_best.pt", map_location=DEVICE))
model.eval()

print("[INFO] UNet++ loaded")

# -----------------------------
# TRANSFORM (512 ONLY)
# -----------------------------
infer_tf = A.Compose([
    A.Resize(IMG_SIZE, IMG_SIZE),
    A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
    ToTensorV2(),
])

# -----------------------------
# RLE ENCODER (OFFICIAL)
# -----------------------------
def encode_rle(mask: np.ndarray) -> str:
    pixels = mask.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[0::2]
    return " ".join(map(str, runs))

# -----------------------------
# POSTPROCESS (512 SPACE ONLY)
# -----------------------------
def remove_small(mask, min_area):
    n, labels, stats, _ = cv2.connectedComponentsWithStats(
        mask.astype(np.uint8), connectivity=8
    )
    out = np.zeros_like(mask, dtype=np.uint8)
    for i in range(1, n):
        if stats[i, cv2.CC_STAT_AREA] >= min_area:
            out[labels == i] = 1
    return out

# -----------------------------
# INFERENCE
# -----------------------------
records = []
test_images = sorted(TEST_IMG_DIR.glob("*.jpg"))

with torch.no_grad():
    for img_path in tqdm(test_images, desc="Final Inference"):
        img = cv2.imread(str(img_path))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        h0, w0 = img.shape[:2]

        x = infer_tf(image=img)["image"].unsqueeze(0).to(DEVICE)
        x_f = torch.flip(x, dims=[3])

        # ---- forward + HFlip TTA ----
        p  = torch.sigmoid(model(x))
        p_f = torch.flip(torch.sigmoid(model(x_f)), dims=[3])
        prob_512 = ((p + p_f) / 2.0)[0, 0].cpu().numpy()

        # ---- GLOBAL LOW-CONFIDENCE GUARD ----
        if prob_512.max() < THRESHOLD * 0.85:
            pred_512 = np.zeros((IMG_SIZE, IMG_SIZE), dtype=np.uint8)
        else:
            pred_512 = (prob_512 > THRESHOLD).astype(np.uint8)
            pred_512 = remove_small(pred_512, MIN_AREA_512)

        # ---- resize to original ----
        pred = cv2.resize(
            pred_512, (w0, h0), interpolation=cv2.INTER_NEAREST
        )

        rle = "" if pred.sum() == 0 else encode_rle(pred)

        records.append({
            "ImageId": img_path.name,
            "rle": rle
        })

# -----------------------------
# SUBMISSION
# -----------------------------
df_sub = pd.DataFrame(records)
df_sample = pd.read_csv(SAMPLE_SUB)
df_sub = df_sub[df_sample.columns.tolist()]

OUT_SUB = "/kaggle/working/submission.csv"
df_sub.to_csv(OUT_SUB, index=False)

print("\n[STAGE 5 COMPLETE — 0.80 READY SUBMISSION]")
print("Saved to:", OUT_SUB)
print("Total:", len(df_sub))
print("Empty RLE:", (df_sub['rle'] == '').sum())
print(df_sub.head())


[INFO] UNet++ loaded


Final Inference: 100%|██████████| 295/295 [00:34<00:00,  8.57it/s]


[STAGE 5 COMPLETE — 0.80 READY SUBMISSION]
Saved to: /kaggle/working/submission.csv
Total: 295
Empty RLE: 0
        ImageId                                                rle
0  test_001.jpg  4644 1 4942 4 5242 5 5540 7 5840 7 6139 9 6439...
1  test_002.jpg  65208 10 65928 10 66646 13 67366 15 68086 15 6...
2  test_003.jpg  534965 4 537261 4 539557 4 541853 4 544149 4 5...
3  test_004.jpg  34951 2 35251 3 35550 6 35850 7 36150 10 36449...
4  test_005.jpg  50530 5 50829 7 51127 10 51427 10 51726 13 520...



