# Data Understanding & Preparation

In [1]:
# ============================================================
# STAGE 1 — Data Understanding & Preparation (FINAL LEADERBOARD)
# Purpose:
# - Validate dataset integrity
# - Quantify Dice risk factors (empty / tiny objects)
# - Extract morphology statistics for post-processing
# - Produce data-driven priors for threshold & min-area
# ============================================================

from pathlib import Path
import numpy as np
import cv2
import pandas as pd
from tqdm import tqdm
import re

# -----------------------------
# CONFIG
# -----------------------------
DATA_ROOT = Path("/kaggle/input/data-science-ara-7-0/dataset/dataset")
TRAIN_IMG_DIR = DATA_ROOT / "train" / "images"
TRAIN_MASK_DIR = DATA_ROOT / "train" / "mask"
TEST_IMG_DIR  = DATA_ROOT / "test" / "images"

IMG_EXTS = {".jpg", ".jpeg", ".png"}

# -----------------------------
# 1. LOAD FILES
# -----------------------------
train_images = sorted([p for p in TRAIN_IMG_DIR.iterdir() if p.suffix.lower() in IMG_EXTS])
train_masks  = sorted([p for p in TRAIN_MASK_DIR.iterdir() if p.suffix.lower() in IMG_EXTS])
test_images  = sorted([p for p in TEST_IMG_DIR.iterdir() if p.suffix.lower() in IMG_EXTS])

print(f"[INFO] Train images : {len(train_images)}")
print(f"[INFO] Train masks  : {len(train_masks)}")
print(f"[INFO] Test images  : {len(test_images)}")

# -----------------------------
# 2. BUILD MASK INDEX
# -----------------------------
def extract_index(name: str):
    m = re.search(r"(\d+)", name)
    return m.group(1) if m else None

mask_index = {}
for m in train_masks:
    idx = extract_index(m.stem)
    if idx is not None:
        mask_index[idx] = m

# -----------------------------
# 3. PAIR IMAGE–MASK
# -----------------------------
pairs = []
for img in train_images:
    idx = extract_index(img.stem)
    if idx in mask_index:
        pairs.append({
            "image_path": img,
            "mask_path": mask_index[idx],
            "id": idx
        })

assert len(pairs) > 0, "No valid image-mask pairs found"
print(f"[INFO] Valid image-mask pairs: {len(pairs)}")

# -----------------------------
# 4. MORPHOLOGY & DICE-RISK ANALYSIS
# -----------------------------
records = []
all_component_areas = []

for p in tqdm(pairs, desc="Analyzing dataset"):
    mask = cv2.imread(str(p["mask_path"]), cv2.IMREAD_GRAYSCALE)
    h, w = mask.shape
    total_pixels = h * w

    bin_mask = (mask == 255).astype(np.uint8)
    pothole_pixels = bin_mask.sum()
    area_ratio = pothole_pixels / total_pixels

    num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(
        bin_mask, connectivity=8
    )

    component_areas = stats[1:, cv2.CC_STAT_AREA] if num_labels > 1 else []
    if len(component_areas) > 0:
        all_component_areas.extend(component_areas.tolist())

    records.append({
        "image": p["image_path"].name,
        "height": h,
        "width": w,
        "has_pothole": int(pothole_pixels > 0),
        "area_ratio": area_ratio,
        "total_pothole_pixels": pothole_pixels,
        "num_components": len(component_areas),
        "max_component_ratio": (
            component_areas.max() / total_pixels if len(component_areas) > 0 else 0.0
        ),
        "min_component_pixels": (
            component_areas.min() if len(component_areas) > 0 else 0
        ),
    })

df = pd.DataFrame(records)

# -----------------------------
# 5. CORE DATASET INSIGHTS
# -----------------------------
print("\n[INSIGHT] Pothole presence distribution:")
print(df["has_pothole"].value_counts())

empty_ratio = (df["has_pothole"] == 0).mean()
print(f"\n[INSIGHT] Empty-mask ratio: {empty_ratio:.2%}")

print("\n[INSIGHT] Pothole area ratio (% of image):")
print(df["area_ratio"].describe(percentiles=[0.1, 0.25, 0.5, 0.75, 0.9]))

print("\n[INSIGHT] Number of components per image:")
print(df["num_components"].describe())

print("\n[INSIGHT] Dominant component ratio:")
print(df["max_component_ratio"].describe())

# -----------------------------
# 6. SMALL-OBJECT RISK (FP KILLER)
# -----------------------------
comp_series = pd.Series(all_component_areas)

print("\n[INSIGHT] Connected component area (pixels):")
print(comp_series.describe(percentiles=[0.1, 0.25, 0.5, 0.75, 0.9]))

min_area_candidate = int(comp_series.quantile(0.10))
print(f"\n[RECOMMENDATION] Candidate MIN_AREA (remove FP): ~{min_area_candidate} pixels")

# -----------------------------
# 7. DICE FEASIBILITY SIGNAL
# -----------------------------
tiny_image_ratio = (df["area_ratio"] < 0.01).mean()

print("\n[FEASIBILITY CHECK]")
print(f"Images with pothole <1% area: {tiny_image_ratio:.2%}")

if tiny_image_ratio > 0.6:
    feasibility = "HARD (Dice ceiling tight)"
elif tiny_image_ratio > 0.4:
    feasibility = "MODERATE (needs strong post-processing)"
else:
    feasibility = "FAVORABLE (0.80+ achievable)"

print(f"[FEASIBILITY STATUS] {feasibility}")

# -----------------------------
# 8. THRESHOLD PRIOR (DATA-DRIVEN)
# -----------------------------
print("\n[THRESHOLD PRIOR]")
print("Based on small-object dominance:")
print("→ Start sweep in range: 0.30 – 0.45 (Dice-friendly)")

# -----------------------------
# 9. FINAL MANIFEST
# -----------------------------
df_manifest = pd.DataFrame({
    "image_path": [str(p["image_path"]) for p in pairs],
    "mask_path":  [str(p["mask_path"]) for p in pairs],
    "id":         [p["id"] for p in pairs],
})

print(f"\n[INFO] Final training samples: {len(df_manifest)}")

print("\n[STAGE 1 COMPLETE — LEADERBOARD READY]")
print("✓ Dataset validated")
print("✓ Dice risk quantified")
print("✓ Min-area & threshold priors extracted")
print("✓ Ready for STAGE 2 (augmentation design)")


[INFO] Train images : 498
[INFO] Train masks  : 498
[INFO] Test images  : 295
[INFO] Valid image-mask pairs: 498


Analyzing dataset: 100%|██████████| 498/498 [00:12<00:00, 40.52it/s]


[INSIGHT] Pothole presence distribution:
has_pothole
1    498
Name: count, dtype: int64

[INSIGHT] Empty-mask ratio: 0.00%

[INSIGHT] Pothole area ratio (% of image):
count    498.000000
mean       0.134860
std        0.128772
min        0.000235
10%        0.007938
25%        0.040943
50%        0.091678
75%        0.193834
90%        0.329536
max        0.674005
Name: area_ratio, dtype: float64

[INSIGHT] Number of components per image:
count    498.000000
mean       4.261044
std        6.239045
min        1.000000
25%        1.000000
50%        2.000000
75%        5.000000
max       67.000000
Name: num_components, dtype: float64

[INSIGHT] Dominant component ratio:
count    498.000000
mean       0.112599
std        0.119287
min        0.000235
25%        0.030156
50%        0.066428
75%        0.162189
max        0.636689
Name: max_component_ratio, dtype: float64

[INSIGHT] Connected component area (pixels):
count    2.122000e+03
mean     5.588544e+04
std      3.030841e+05
min     




# Preprocessing & Data Augmentation

In [2]:
# ============================================================
# STAGE 2 — Preprocessing & Data Augmentation (FINAL LEADERBOARD)
# Goal:
# - Dice-safe augmentation (no boundary destruction)
# - Robust to lighting, shadow, texture
# - Resolution-aware (512 = context | 640 = boundary)
# - Ensemble-compatible (UNet++ primary)
# ============================================================

import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2

# -----------------------------
# GLOBAL NORMALIZATION
# -----------------------------
IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD  = (0.229, 0.224, 0.225)

# ============================================================
# 1. TRAIN AUGMENTATION — 512 (GENERALIZATION / CONTEXT)
# ============================================================
train_transform_512 = A.Compose(
    [
        # resize for context learning
        A.Resize(512, 512, interpolation=cv2.INTER_LINEAR),

        # geometry (SAFE for road scenes)
        A.HorizontalFlip(p=0.5),
        A.Affine(
            scale=(0.95, 1.08),
            translate_percent=(0.0, 0.04),
            rotate=(-3, 3),
            interpolation=cv2.INTER_LINEAR,
            mode=cv2.BORDER_REFLECT_101,
            p=0.5,
        ),

        # photometric (main generalization driver)
        A.RandomBrightnessContrast(
            brightness_limit=0.20,
            contrast_limit=0.20,
            p=0.7,
        ),
        A.HueSaturationValue(
            hue_shift_limit=6,
            sat_shift_limit=12,
            val_shift_limit=6,
            p=0.35,
        ),

        # realistic road shadow (lower half bias)
        A.RandomShadow(
            shadow_roi=(0, 0.5, 1, 1),
            p=0.25,
        ),

        # VERY mild noise (anti-overfit, Dice-safe)
        A.OneOf(
            [
                A.GaussianBlur(blur_limit=3),
                A.GaussNoise(var_limit=(5.0, 20.0)),
            ],
            p=0.15,
        ),

        # normalize
        A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
        ToTensorV2(),
    ],
    additional_targets={"mask": "mask"},
)

# ============================================================
# 2. TRAIN AUGMENTATION — 640 (BOUNDARY PRECISION)
# ============================================================
train_transform_640 = A.Compose(
    [
        # high-res for boundary refinement
        A.Resize(640, 640, interpolation=cv2.INTER_LINEAR),

        A.HorizontalFlip(p=0.5),
        A.Affine(
            scale=(0.98, 1.05),
            translate_percent=(0.0, 0.03),
            rotate=(-2, 2),
            interpolation=cv2.INTER_LINEAR,
            mode=cv2.BORDER_REFLECT_101,
            p=0.4,
        ),

        # lighter photometric (keep edges clean)
        A.RandomBrightnessContrast(
            brightness_limit=0.15,
            contrast_limit=0.15,
            p=0.6,
        ),
        A.HueSaturationValue(
            hue_shift_limit=4,
            sat_shift_limit=10,
            val_shift_limit=4,
            p=0.25,
        ),

        # shadow allowed but weaker
        A.RandomShadow(
            shadow_roi=(0, 0.5, 1, 1),
            p=0.20,
        ),

        # NO noise here (boundary critical)
        A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
        ToTensorV2(),
    ],
    additional_targets={"mask": "mask"},
)

# ============================================================
# 3. VALIDATION TRANSFORM (STRICT & DETERMINISTIC)
# ============================================================
valid_transform = A.Compose(
    [
        A.Resize(512, 512, interpolation=cv2.INTER_LINEAR),
        A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
        ToTensorV2(),
    ],
    additional_targets={"mask": "mask"},
)

# ============================================================
# 4. TEST TRANSFORM (MUST MATCH VALIDATION)
# ============================================================
test_transform = A.Compose(
    [
        A.Resize(512, 512, interpolation=cv2.INTER_LINEAR),
        A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
        ToTensorV2(),
    ]
)

# ============================================================
# 5. FINAL CHECK
# ============================================================
print("[STAGE 2 COMPLETE — LEADERBOARD READY]")
print("✓ Dice-safe augmentations")
print("✓ 512 = context & robustness")
print("✓ 640 = boundary precision")
print("✓ No destructive transforms")
print("✓ Validation/Test deterministic")
print("✓ Ready for STAGE 3 (model training)")


[STAGE 2 COMPLETE — LEADERBOARD READY]
✓ Dice-safe augmentations
✓ 512 = context & robustness
✓ 640 = boundary precision
✓ No destructive transforms
✓ Validation/Test deterministic
✓ Ready for STAGE 3 (model training)


  A.Affine(
  A.GaussNoise(var_limit=(5.0, 20.0)),
  A.Affine(


# Model Construction & Training

In [3]:
!pip install -q segmentation-models-pytorch==0.3.3 timm

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.8/58.8 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m68.5/68.5 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m106.7/106.7 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m36.0 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25h  Building wheel for efficientnet-pytorch (setup.py) ... [?25l[?25hdone
  Building wheel for pretrainedmodels (setup.py) ... [?25l[?25hdone


In [5]:
# ============================================================
# STAGE 3 — Model Construction & Training (FINAL MONITORABLE)
# - UNet++ primary
# - DeepLabV3+ support
# - REAL train / val split
# - Train loss + Val Dice logging
# ============================================================

import os, re, random
from pathlib import Path
import numpy as np
import pandas as pd
import cv2
from tqdm import tqdm

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR

import segmentation_models_pytorch as smp
from sklearn.model_selection import train_test_split

# -----------------------------
# SEED & DEVICE
# -----------------------------
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# -----------------------------
# DATA
# -----------------------------
DATA_ROOT = Path("/kaggle/input/data-science-ara-7-0/dataset/dataset")
TRAIN_IMG_DIR = DATA_ROOT / "train/images"
TRAIN_MASK_DIR = DATA_ROOT / "train/mask"

def extract_idx(name):
    return re.search(r"(\d+)", name).group(1)

pairs = []
for img in TRAIN_IMG_DIR.iterdir():
    idx = extract_idx(img.name)
    mask = TRAIN_MASK_DIR / f"mask_{idx}.png"
    if mask.exists():
        pairs.append((str(img), str(mask)))

df = pd.DataFrame(pairs, columns=["image_path", "mask_path"])
print("Total samples:", len(df))

df_train, df_val = train_test_split(
    df, test_size=0.15, random_state=SEED, shuffle=True
)

print("Train:", len(df_train), "| Val:", len(df_val))

# -----------------------------
# DATASET
# -----------------------------
class PotholeDataset(Dataset):
    def __init__(self, df, transform):
        self.df = df.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img = cv2.imread(self.df.loc[idx, "image_path"])
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        mask = cv2.imread(self.df.loc[idx, "mask_path"], cv2.IMREAD_GRAYSCALE)
        mask = (mask == 255).astype("float32")

        aug = self.transform(image=img, mask=mask)
        return aug["image"], aug["mask"].unsqueeze(0)

# -----------------------------
# METRIC
# -----------------------------
def dice_coef(prob, target, thr=0.4, eps=1e-7):
    pred = (prob > thr).float()
    inter = (pred * target).sum(dim=(2,3))
    union = pred.sum(dim=(2,3)) + target.sum(dim=(2,3))
    return ((2 * inter + eps) / (union + eps)).mean()

# -----------------------------
# LOSSES
# -----------------------------
dice_loss = smp.losses.DiceLoss(mode="binary", from_logits=True)
focal_loss = smp.losses.FocalLoss(mode="binary", gamma=2.0)

# -----------------------------
# MODEL FACTORY
# -----------------------------
def build_model(name):
    if name == "unetpp":
        return smp.UnetPlusPlus(
            encoder_name="efficientnet-b4",
            encoder_weights="imagenet",
            in_channels=3,
            classes=1,
        )
    if name == "deeplab":
        return smp.DeepLabV3Plus(
            encoder_name="resnet101",
            encoder_weights="imagenet",
            in_channels=3,
            classes=1,
        )

# -----------------------------
# TRAIN FUNCTION
# -----------------------------
def train_one_model(name, max_epoch):

    print(f"\n===== TRAINING {name.upper()} =====")
    model = build_model(name).to(device)

    optimizer = AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)
    scheduler = CosineAnnealingLR(optimizer, T_max=max_epoch)

    train_loader = DataLoader(
        PotholeDataset(df_train, train_transform_512),
        batch_size=4,
        shuffle=True,
        num_workers=2,
        pin_memory=True
    )

    val_loader = DataLoader(
        PotholeDataset(df_val, valid_transform),
        batch_size=4,
        shuffle=False,
        num_workers=2,
        pin_memory=True
    )

    best_val = 0.0

    for epoch in range(max_epoch):
        model.train()
        total_loss = 0.0

        for imgs, masks in tqdm(train_loader, desc=f"{name} | Epoch {epoch+1}"):
            imgs, masks = imgs.to(device), masks.to(device)
            optimizer.zero_grad()

            logits = model(imgs)
            loss = dice_loss(logits, masks) + 0.5 * focal_loss(logits, masks)

            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        scheduler.step()
        avg_loss = total_loss / len(train_loader)

        # ---------------- VALIDATION ----------------
        model.eval()
        dices = []

        with torch.no_grad():
            for imgs, masks in val_loader:
                imgs, masks = imgs.to(device), masks.to(device)
                prob = torch.sigmoid(model(imgs))
                dices.append(dice_coef(prob, masks).item())

        val_dice = float(np.mean(dices))

        print(
            f"{name} | Epoch {epoch+1:02d} | "
            f"TrainLoss {avg_loss:.4f} | ValDice {val_dice:.4f}"
        )

        if val_dice > best_val:
            best_val = val_dice
            torch.save(model.state_dict(), f"/kaggle/working/best_{name}.pt")
            print(f">> Best {name} saved")

    print(f"[DONE] {name} best Val Dice: {best_val:.4f}")

# -----------------------------
# RUN TRAINING
# -----------------------------
train_one_model("unetpp", max_epoch=25)
train_one_model("deeplab", max_epoch=18)

print("\n[STAGE 3 COMPLETE — MONITORABLE & LEADERBOARD SAFE]")

Device: cuda
Total samples: 498
Train: 423 | Val: 75

===== TRAINING UNETPP =====


unetpp | Epoch 1: 100%|██████████| 106/106 [01:18<00:00,  1.34it/s]


unetpp | Epoch 01 | TrainLoss 0.7843 | ValDice 0.5355
>> Best unetpp saved


unetpp | Epoch 2: 100%|██████████| 106/106 [01:17<00:00,  1.36it/s]


unetpp | Epoch 02 | TrainLoss 0.5922 | ValDice 0.6452
>> Best unetpp saved


unetpp | Epoch 3: 100%|██████████| 106/106 [01:17<00:00,  1.36it/s]


unetpp | Epoch 03 | TrainLoss 0.4829 | ValDice 0.6659
>> Best unetpp saved


unetpp | Epoch 4: 100%|██████████| 106/106 [01:17<00:00,  1.36it/s]


unetpp | Epoch 04 | TrainLoss 0.4059 | ValDice 0.6755
>> Best unetpp saved


unetpp | Epoch 5: 100%|██████████| 106/106 [01:17<00:00,  1.36it/s]


unetpp | Epoch 05 | TrainLoss 0.3554 | ValDice 0.6768
>> Best unetpp saved


unetpp | Epoch 6: 100%|██████████| 106/106 [01:17<00:00,  1.37it/s]


unetpp | Epoch 06 | TrainLoss 0.3219 | ValDice 0.6830
>> Best unetpp saved


unetpp | Epoch 7: 100%|██████████| 106/106 [01:17<00:00,  1.37it/s]


unetpp | Epoch 07 | TrainLoss 0.2949 | ValDice 0.6925
>> Best unetpp saved


unetpp | Epoch 8: 100%|██████████| 106/106 [01:17<00:00,  1.36it/s]


unetpp | Epoch 08 | TrainLoss 0.2687 | ValDice 0.7009
>> Best unetpp saved


unetpp | Epoch 9: 100%|██████████| 106/106 [01:17<00:00,  1.37it/s]


unetpp | Epoch 09 | TrainLoss 0.2460 | ValDice 0.7255
>> Best unetpp saved


unetpp | Epoch 10: 100%|██████████| 106/106 [01:17<00:00,  1.37it/s]


unetpp | Epoch 10 | TrainLoss 0.2262 | ValDice 0.7346
>> Best unetpp saved


unetpp | Epoch 11: 100%|██████████| 106/106 [01:17<00:00,  1.37it/s]


unetpp | Epoch 11 | TrainLoss 0.2126 | ValDice 0.7313


unetpp | Epoch 12: 100%|██████████| 106/106 [01:17<00:00,  1.37it/s]


unetpp | Epoch 12 | TrainLoss 0.2157 | ValDice 0.7379
>> Best unetpp saved


unetpp | Epoch 13: 100%|██████████| 106/106 [01:18<00:00,  1.35it/s]


unetpp | Epoch 13 | TrainLoss 0.1945 | ValDice 0.7303


unetpp | Epoch 14: 100%|██████████| 106/106 [01:17<00:00,  1.36it/s]


unetpp | Epoch 14 | TrainLoss 0.1840 | ValDice 0.7496
>> Best unetpp saved


unetpp | Epoch 15: 100%|██████████| 106/106 [01:17<00:00,  1.37it/s]


unetpp | Epoch 15 | TrainLoss 0.1887 | ValDice 0.7514
>> Best unetpp saved


unetpp | Epoch 16: 100%|██████████| 106/106 [01:17<00:00,  1.36it/s]


unetpp | Epoch 16 | TrainLoss 0.1817 | ValDice 0.7548
>> Best unetpp saved


unetpp | Epoch 17: 100%|██████████| 106/106 [01:17<00:00,  1.37it/s]


unetpp | Epoch 17 | TrainLoss 0.1731 | ValDice 0.7530


unetpp | Epoch 18: 100%|██████████| 106/106 [01:17<00:00,  1.36it/s]


unetpp | Epoch 18 | TrainLoss 0.1583 | ValDice 0.7654
>> Best unetpp saved


unetpp | Epoch 19: 100%|██████████| 106/106 [01:17<00:00,  1.36it/s]


unetpp | Epoch 19 | TrainLoss 0.1655 | ValDice 0.7600


unetpp | Epoch 20: 100%|██████████| 106/106 [01:17<00:00,  1.36it/s]


unetpp | Epoch 20 | TrainLoss 0.1527 | ValDice 0.7587


unetpp | Epoch 21: 100%|██████████| 106/106 [01:17<00:00,  1.37it/s]


unetpp | Epoch 21 | TrainLoss 0.1764 | ValDice 0.7573


unetpp | Epoch 22: 100%|██████████| 106/106 [01:17<00:00,  1.36it/s]


unetpp | Epoch 22 | TrainLoss 0.1638 | ValDice 0.7549


unetpp | Epoch 23: 100%|██████████| 106/106 [01:17<00:00,  1.37it/s]


unetpp | Epoch 23 | TrainLoss 0.1647 | ValDice 0.7567


unetpp | Epoch 24: 100%|██████████| 106/106 [01:17<00:00,  1.37it/s]


unetpp | Epoch 24 | TrainLoss 0.1631 | ValDice 0.7561


unetpp | Epoch 25: 100%|██████████| 106/106 [01:17<00:00,  1.36it/s]


unetpp | Epoch 25 | TrainLoss 0.1644 | ValDice 0.7551
[DONE] unetpp best Val Dice: 0.7654

===== TRAINING DEEPLAB =====


deeplab | Epoch 1: 100%|██████████| 106/106 [00:59<00:00,  1.79it/s]


deeplab | Epoch 01 | TrainLoss 0.6911 | ValDice 0.5120
>> Best deeplab saved


deeplab | Epoch 2: 100%|██████████| 106/106 [00:58<00:00,  1.81it/s]


deeplab | Epoch 02 | TrainLoss 0.5613 | ValDice 0.6063
>> Best deeplab saved


deeplab | Epoch 3: 100%|██████████| 106/106 [00:58<00:00,  1.81it/s]


deeplab | Epoch 03 | TrainLoss 0.4853 | ValDice 0.6304
>> Best deeplab saved


deeplab | Epoch 4: 100%|██████████| 106/106 [00:58<00:00,  1.82it/s]


deeplab | Epoch 04 | TrainLoss 0.4179 | ValDice 0.6462
>> Best deeplab saved


deeplab | Epoch 5: 100%|██████████| 106/106 [00:58<00:00,  1.82it/s]


deeplab | Epoch 05 | TrainLoss 0.3703 | ValDice 0.6488
>> Best deeplab saved


deeplab | Epoch 6: 100%|██████████| 106/106 [00:58<00:00,  1.80it/s]


deeplab | Epoch 06 | TrainLoss 0.3540 | ValDice 0.6255


deeplab | Epoch 7: 100%|██████████| 106/106 [00:58<00:00,  1.82it/s]


deeplab | Epoch 07 | TrainLoss 0.3194 | ValDice 0.6673
>> Best deeplab saved


deeplab | Epoch 8: 100%|██████████| 106/106 [00:58<00:00,  1.81it/s]


deeplab | Epoch 08 | TrainLoss 0.2864 | ValDice 0.6897
>> Best deeplab saved


deeplab | Epoch 9: 100%|██████████| 106/106 [00:58<00:00,  1.80it/s]


deeplab | Epoch 09 | TrainLoss 0.2632 | ValDice 0.6801


deeplab | Epoch 10: 100%|██████████| 106/106 [00:58<00:00,  1.81it/s]


deeplab | Epoch 10 | TrainLoss 0.2531 | ValDice 0.6723


deeplab | Epoch 11: 100%|██████████| 106/106 [00:58<00:00,  1.81it/s]


deeplab | Epoch 11 | TrainLoss 0.2249 | ValDice 0.7083
>> Best deeplab saved


deeplab | Epoch 12: 100%|██████████| 106/106 [00:58<00:00,  1.81it/s]


deeplab | Epoch 12 | TrainLoss 0.2213 | ValDice 0.7077


deeplab | Epoch 13: 100%|██████████| 106/106 [00:59<00:00,  1.80it/s]


deeplab | Epoch 13 | TrainLoss 0.1978 | ValDice 0.7231
>> Best deeplab saved


deeplab | Epoch 14: 100%|██████████| 106/106 [00:58<00:00,  1.82it/s]


deeplab | Epoch 14 | TrainLoss 0.1799 | ValDice 0.7254
>> Best deeplab saved


deeplab | Epoch 15: 100%|██████████| 106/106 [00:58<00:00,  1.80it/s]


deeplab | Epoch 15 | TrainLoss 0.1808 | ValDice 0.7188


deeplab | Epoch 16: 100%|██████████| 106/106 [00:58<00:00,  1.80it/s]


deeplab | Epoch 16 | TrainLoss 0.1779 | ValDice 0.7147


deeplab | Epoch 17: 100%|██████████| 106/106 [00:58<00:00,  1.80it/s]


deeplab | Epoch 17 | TrainLoss 0.1753 | ValDice 0.7158


deeplab | Epoch 18: 100%|██████████| 106/106 [00:59<00:00,  1.79it/s]


deeplab | Epoch 18 | TrainLoss 0.1796 | ValDice 0.7320
>> Best deeplab saved
[DONE] deeplab best Val Dice: 0.7320

[STAGE 3 COMPLETE — MONITORABLE & LEADERBOARD SAFE]


# Optimization, Validation & Refinement

In [7]:
# ============================================================
# STAGE 4 — Ensemble Optimization & Refinement (FINAL)
# - Fully aligned with STAGE 3
# - UNet++ + DeepLabV3+
# - REAL validation (no proxy, no leakage)
# - Dice-faithful (empty pred = Dice 0)
# ============================================================

!pip install -q optuna

import optuna
import numpy as np
import torch
import cv2
from tqdm import tqdm
from pathlib import Path

import segmentation_models_pytorch as smp
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

# -----------------------------
# DEVICE
# -----------------------------
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", DEVICE)

# ============================================================
# DATA (SAME AS STAGE 3)
# ============================================================
DATA_ROOT = Path("/kaggle/input/data-science-ara-7-0/dataset/dataset")
TRAIN_IMG_DIR = DATA_ROOT / "train/images"
TRAIN_MASK_DIR = DATA_ROOT / "train/mask"

def extract_idx(name):
    import re
    return re.search(r"(\d+)", name).group(1)

pairs = []
for img in TRAIN_IMG_DIR.iterdir():
    idx = extract_idx(img.name)
    mask = TRAIN_MASK_DIR / f"mask_{idx}.png"
    if mask.exists():
        pairs.append((str(img), str(mask)))

df = np.array(pairs, dtype=object)

# ============================================================
# DATASET (IDENTICAL LOGIC TO STAGE 3)
# ============================================================
class PotholeDataset(Dataset):
    def __init__(self, pairs, transform):
        self.pairs = pairs
        self.transform = transform

    def __len__(self):
        return len(self.pairs)

    def __getitem__(self, idx):
        img_path, mask_path = self.pairs[idx]
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        mask = (mask == 255).astype("float32")
        aug = self.transform(image=img, mask=mask)
        return aug["image"], aug["mask"].unsqueeze(0)

# ============================================================
# TRANSFORM (MATCH STAGE 3 VALID)
# ============================================================
import albumentations as A
from albumentations.pytorch import ToTensorV2

IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD  = (0.229, 0.224, 0.225)

valid_transform = A.Compose([
    A.Resize(512, 512),
    A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD),
    ToTensorV2(),
])

# ============================================================
# VALIDATION SPLIT (REAL, LEAK-SAFE)
# ============================================================
_, val_pairs = train_test_split(
    df, test_size=0.15, random_state=42
)

val_loader = DataLoader(
    PotholeDataset(val_pairs, valid_transform),
    batch_size=4,
    shuffle=False,
    num_workers=2,
    pin_memory=True
)

print("[INFO] Validation samples:", len(val_pairs))

# ============================================================
# LOAD TRAINED MODELS (FROM STAGE 3)
# ============================================================
unetpp = smp.UnetPlusPlus(
    encoder_name="efficientnet-b4",
    encoder_weights=None,
    in_channels=3,
    classes=1,
).to(DEVICE)

deeplab = smp.DeepLabV3Plus(
    encoder_name="resnet101",
    encoder_weights=None,
    in_channels=3,
    classes=1,
).to(DEVICE)

unetpp.load_state_dict(torch.load("/kaggle/working/best_unetpp.pt", map_location=DEVICE))
deeplab.load_state_dict(torch.load("/kaggle/working/best_deeplab.pt", map_location=DEVICE))

unetpp.eval()
deeplab.eval()

print("[INFO] Models loaded: UNet++ + DeepLabV3+")

# ============================================================
# METRICS & POSTPROCESS
# ============================================================
def dice_score(pred, target, eps=1e-7):
    inter = (pred * target).sum()
    union = pred.sum() + target.sum()
    return (2 * inter + eps) / (union + eps)

def remove_small_objects(mask, min_area):
    num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(
        mask.astype(np.uint8), connectivity=8
    )
    clean = np.zeros_like(mask, dtype=np.uint8)
    for i in range(1, num_labels):
        if stats[i, cv2.CC_STAT_AREA] >= min_area:
            clean[labels == i] = 1
    return clean

def normalize_prob(p):
    return np.clip(p, 1e-6, 1 - 1e-6)

# ============================================================
# OPTUNA OBJECTIVE (DICE-FAITHFUL)
# ============================================================
def objective(trial):

    # UNet++ dominant ensemble
    w_u = trial.suggest_float("w_unetpp", 0.65, 0.85)
    w_d = trial.suggest_float("w_deeplab", 0.15, 0.35)

    s = w_u + w_d
    w_u, w_d = w_u / s, w_d / s

    threshold = trial.suggest_float("threshold", 0.30, 0.45)
    min_area  = trial.suggest_int("min_area", 100, 400, step=20)

    dices = []

    with torch.no_grad():
        for imgs, masks in val_loader:
            imgs = imgs.to(DEVICE)
            gt = masks.numpy()

            pu = torch.sigmoid(unetpp(imgs)).cpu().numpy()
            pd = torch.sigmoid(deeplab(imgs)).cpu().numpy()

            pu = normalize_prob(pu)
            pd = normalize_prob(pd)

            prob = w_u * pu + w_d * pd

            for i in range(prob.shape[0]):
                pred = (prob[i, 0] > threshold).astype(np.uint8)
                pred = remove_small_objects(pred, min_area)

                # empty pred = Dice 0 (NO optimistic bias)
                dices.append(dice_score(pred, gt[i, 0]))

    return float(np.mean(dices))

# ============================================================
# RUN OPTUNA
# ============================================================
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=40, show_progress_bar=True)

best = study.best_params
best_dice = study.best_value

# normalize weights
ws = best["w_unetpp"] + best["w_deeplab"]
best["w_unetpp"] /= ws
best["w_deeplab"] /= ws

print("\n[OPTUNA BEST CONFIG — STAGE 3 CONSISTENT]")
for k, v in best.items():
    print(f"{k}: {v}")
print(f"Validation Dice: {best_dice:.4f}")

# ============================================================
# EXPORT CONFIG FOR STAGE 5
# ============================================================
OPT_CONFIG = {
    "weights": {
        "unetpp": best["w_unetpp"],
        "deeplab": best["w_deeplab"],
    },
    "threshold": best["threshold"],
    "min_area": best["min_area"],
}

print("\n[STAGE 4 COMPLETE — READY FOR STAGE 5]")
print("✓ Real validation Dice")
print("✓ No SegFormer")
print("✓ No proxy / no leakage")
print("✓ Leaderboard-safe")

Device: cuda
[INFO] Validation samples: 75


[I 2026-02-05 11:52:21,956] A new study created in memory with name: no-name-55b229a9-4633-4dde-aba5-2f91fef56cfc


[INFO] Models loaded: UNet++ + DeepLabV3+


  0%|          | 0/40 [00:00<?, ?it/s]

[I 2026-02-05 11:52:29,859] Trial 0 finished with value: 0.7630795607907076 and parameters: {'w_unetpp': 0.8157018988285771, 'w_deeplab': 0.21686005128799274, 'threshold': 0.35106936478959466, 'min_area': 140}. Best is trial 0 with value: 0.7630795607907076.
[I 2026-02-05 11:52:37,975] Trial 1 finished with value: 0.7331048649361702 and parameters: {'w_unetpp': 0.7685208564160664, 'w_deeplab': 0.33163365663022015, 'threshold': 0.40943052897140153, 'min_area': 320}. Best is trial 0 with value: 0.7630795607907076.
[I 2026-02-05 11:52:46,060] Trial 2 finished with value: 0.73219283841423 and parameters: {'w_unetpp': 0.7981498469094792, 'w_deeplab': 0.3054457478906737, 'threshold': 0.32526222999727117, 'min_area': 360}. Best is trial 0 with value: 0.7630795607907076.
[I 2026-02-05 11:52:53,835] Trial 3 finished with value: 0.7317249020622909 and parameters: {'w_unetpp': 0.6588169067323288, 'w_deeplab': 0.22971907026192567, 'threshold': 0.32179944239061287, 'min_area': 340}. Best is trial 0

# Inference, Encoding & Submission

In [10]:
# ============================================================
# STAGE 5 — Ensemble Inference, RLE Encoding & Submission
# FINAL LEADERBOARD (ANTI-ZONK, 2-MODEL, FIXED)
# ============================================================

import numpy as np
import pandas as pd
import torch
import cv2
from pathlib import Path
from tqdm import tqdm

import segmentation_models_pytorch as smp

# -----------------------------
# CONFIG
# -----------------------------
DATA_ROOT = Path("/kaggle/input/data-science-ara-7-0/dataset/dataset")
TEST_IMG_DIR = DATA_ROOT / "test" / "images"
SAMPLE_SUB = Path("/kaggle/input/data-science-ara-7-0/sample_submission.csv")

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# --- from STAGE 4 ---
W_U = OPT_CONFIG["weights"]["unetpp"]
W_D = OPT_CONFIG["weights"]["deeplab"]
THRESHOLD = OPT_CONFIG["threshold"]
MIN_AREA = OPT_CONFIG["min_area"]

INPUT_SIZE = 512

IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD  = (0.229, 0.224, 0.225)

# -----------------------------
# LOAD MODELS
# -----------------------------
unetpp = smp.UnetPlusPlus(
    encoder_name="efficientnet-b4",
    encoder_weights=None,
    in_channels=3,
    classes=1,
).to(DEVICE)

deeplab = smp.DeepLabV3Plus(
    encoder_name="resnet101",
    encoder_weights=None,
    in_channels=3,
    classes=1,
).to(DEVICE)

unetpp.load_state_dict(torch.load("/kaggle/working/best_unetpp.pt", map_location=DEVICE))
deeplab.load_state_dict(torch.load("/kaggle/working/best_deeplab.pt", map_location=DEVICE))

unetpp.eval()
deeplab.eval()

print("[INFO] Models loaded: UNet++ + DeepLabV3+")

# -----------------------------
# RLE ENCODER (OFFICIAL)
# -----------------------------
def encode_rle(mask: np.ndarray) -> str:
    binary = (mask == 1).astype(np.uint8)
    pixels = binary.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[0::2]
    return " ".join(str(x) for x in runs)

# -----------------------------
# POST-PROCESS
# -----------------------------
def remove_small_objects(mask, min_area):
    num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(
        mask.astype(np.uint8), connectivity=8
    )
    clean = np.zeros_like(mask, dtype=np.uint8)
    for i in range(1, num_labels):
        if stats[i, cv2.CC_STAT_AREA] >= min_area:
            clean[labels == i] = 1
    return clean

# -----------------------------
# LOAD TEST FILES
# -----------------------------
test_images = sorted(TEST_IMG_DIR.glob("*.jpg"))
print("[INFO] Test images:", len(test_images))

# -----------------------------
# ENSEMBLE INFERENCE + H-FLIP TTA
# -----------------------------
records = []

with torch.no_grad():
    for img_path in tqdm(test_images, desc="Final Ensemble Inference"):
        img_name = img_path.name

        img = cv2.imread(str(img_path))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        h0, w0 = img.shape[:2]

        # --- preprocess ---
        img_r = cv2.resize(img, (INPUT_SIZE, INPUT_SIZE)).astype("float32") / 255.0
        for c in range(3):
            img_r[..., c] = (img_r[..., c] - IMAGENET_MEAN[c]) / IMAGENET_STD[c]

        x = torch.from_numpy(img_r.transpose(2,0,1)).unsqueeze(0).to(DEVICE)
        x_flip = torch.flip(x, dims=[3])

        # --- forward ---
        p_u = torch.sigmoid(unetpp(x))
        p_d = torch.sigmoid(deeplab(x))

        p_u_f = torch.flip(torch.sigmoid(unetpp(x_flip)), dims=[3])
        p_d_f = torch.flip(torch.sigmoid(deeplab(x_flip)), dims=[3])

        # --- ensemble (torch) ---
        p_u = (p_u + p_u_f) / 2.0
        p_d = (p_d + p_d_f) / 2.0

        prob = (W_U * p_u + W_D * p_d)[0, 0]

        # --- to numpy ---
        prob = prob.cpu().numpy()

        pred = (prob > THRESHOLD).astype(np.uint8)
        pred = remove_small_objects(pred, MIN_AREA)
        pred = cv2.resize(pred, (w0, h0), interpolation=cv2.INTER_NEAREST)

        rle = "" if pred.sum() == 0 else encode_rle(pred)

        records.append({
            "ImageId": img_name,
            "rle": rle
        })

# -----------------------------
# BUILD SUBMISSION
# -----------------------------
df_sub = pd.DataFrame(records)
df_sample = pd.read_csv(SAMPLE_SUB)
df_sub = df_sub[df_sample.columns.tolist()]

OUT_SUB = "/kaggle/working/submission.csv"
df_sub.to_csv(OUT_SUB, index=False)

print("\n[STAGE 5 COMPLETE — SUBMISSION READY]")
print("Saved to:", OUT_SUB)
print("Rows:", len(df_sub))
print("Empty RLE:", (df_sub["rle"] == "").sum())
print(df_sub.head())


[INFO] Models loaded: UNet++ + DeepLabV3+
[INFO] Test images: 295


Final Ensemble Inference: 100%|██████████| 295/295 [01:00<00:00,  4.91it/s]



[STAGE 5 COMPLETE — SUBMISSION READY]
Saved to: /kaggle/working/submission.csv
Rows: 295
Empty RLE: 0
        ImageId                                                rle
0  test_001.jpg  4942 4 5242 4 5540 7 5840 7 6139 9 6439 9 6738...
1  test_002.jpg  106941 6 107661 6 108381 6 109100 8 109820 10 ...
2  test_003.jpg  1102019 13 1104315 13 1106611 13 1108907 13 11...
3  test_004.jpg  13620 2 13918 5 14218 7 14517 8 14818 8 15118 ...
4  test_005.jpg  40018 2 40318 3 40617 4 40915 7 41215 7 41515 ...
