# Data Understanding & Preparation

In [1]:
# ============================================================
# STAGE 1 — Data Understanding & Preparation (FINAL FIX)
# Data Science ARA 7.0 — Pothole Segmentation
# Compatible with index-based filenames:
#   train_XXX.jpg <-> mask_XXX.png
# ============================================================

from pathlib import Path
import numpy as np
import pandas as pd
import cv2
from tqdm import tqdm
from skimage.measure import label, regionprops

# -------------------------------
# Paths (Kaggle)
# -------------------------------
ROOT = Path("/kaggle/input/data-science-ara-7-0/dataset/dataset")

TRAIN_IMG_DIR = ROOT / "train" / "images"
TRAIN_MSK_DIR = ROOT / "train" / "mask"
TEST_IMG_DIR  = ROOT / "test"  / "images"

assert TRAIN_IMG_DIR.exists()
assert TRAIN_MSK_DIR.exists()
assert TEST_IMG_DIR.exists()

# -------------------------------
# Load file lists
# -------------------------------
train_imgs = sorted(TRAIN_IMG_DIR.glob("train_*.jpg"))
train_msks = sorted(TRAIN_MSK_DIR.glob("mask_*.png"))
test_imgs  = sorted(TEST_IMG_DIR.glob("test_*.jpg"))

print(f"[INFO] Train images : {len(train_imgs)}")
print(f"[INFO] Train masks  : {len(train_msks)}")
print(f"[INFO] Test images  : {len(test_imgs)}")

assert len(train_imgs) == len(train_msks), "Mismatch train image & mask count"

# -------------------------------
# Pairing by index
# -------------------------------
pairs = list(zip(train_imgs, train_msks))
print(f"[INFO] Valid image-mask pairs: {len(pairs)}")

# -------------------------------
# Profiling
# -------------------------------
image_records = []
component_records = []

for img_path, msk_path in tqdm(pairs, desc="Analyzing dataset"):
    mask = cv2.imread(str(msk_path), cv2.IMREAD_GRAYSCALE)
    h, w = mask.shape
    total_pixels = h * w

    bin_mask = (mask > 0).astype(np.uint8)
    pos_pixels = bin_mask.sum()
    area_ratio = pos_pixels / total_pixels

    lbl = label(bin_mask)
    regions = regionprops(lbl)

    num_components = len(regions)
    comp_areas = [r.area for r in regions]

    max_comp_ratio = max(comp_areas) / total_pixels if comp_areas else 0

    image_records.append({
        "image_id": img_path.name,
        "area_ratio": area_ratio,
        "num_components": num_components,
        "max_component_ratio": max_comp_ratio
    })

    for r in regions:
        component_records.append({
            "image_id": img_path.name,
            "component_area": r.area,
            "component_ratio": r.area / total_pixels
        })

df_img = pd.DataFrame(image_records)
df_comp = pd.DataFrame(component_records)

# -------------------------------
# INSIGHTS
# -------------------------------
print("\n[INSIGHT] Pothole presence distribution:")
print((df_img["area_ratio"] > 0).value_counts())

empty_ratio = (df_img["area_ratio"] == 0).mean()
print(f"\n[INSIGHT] Empty-mask ratio: {empty_ratio:.2%}")

print("\n[INSIGHT] Pothole area ratio (% of image):")
print(df_img["area_ratio"].describe())

print("\n[INSIGHT] Number of components per image:")
print(df_img["num_components"].describe())

print("\n[INSIGHT] Dominant component ratio:")
print(df_img["max_component_ratio"].describe())

print("\n[INSIGHT] Connected component area (pixels):")
print(df_comp["component_area"].describe())

# -------------------------------
# Leaderboard-oriented priors
# -------------------------------
min_area_prior = int(df_comp["component_area"].quantile(0.10))
print(f"\n[RECOMMENDATION] Candidate MIN_AREA (remove FP): ~{min_area_prior} pixels")

small_obj_ratio = (df_img["area_ratio"] < 0.01).mean()
print("\n[FEASIBILITY CHECK]")
print(f"Images with pothole <1% area: {small_obj_ratio:.2%}")

if small_obj_ratio < 0.20:
    print("[FEASIBILITY STATUS] FAVORABLE (0.80+ achievable)")
else:
    print("[FEASIBILITY STATUS] HARD (small-object dominant)")

print("\n[THRESHOLD PRIOR]")
print("→ Start sweep in range: 0.30 – 0.45 (Dice-friendly)")

print("\n[INFO] Final training samples:", len(df_img))

print("\n[STAGE 1 COMPLETE — LEADERBOARD READY]")
print("✓ Dataset validated")
print("✓ Dice risk quantified")
print("✓ Min-area & threshold priors extracted")
print("✓ Ready for STAGE 2 (augmentation design)")


[INFO] Train images : 498
[INFO] Train masks  : 498
[INFO] Test images  : 295
[INFO] Valid image-mask pairs: 498


Analyzing dataset: 100%|██████████| 498/498 [00:29<00:00, 16.95it/s]


[INSIGHT] Pothole presence distribution:
area_ratio
True    498
Name: count, dtype: int64

[INSIGHT] Empty-mask ratio: 0.00%

[INSIGHT] Pothole area ratio (% of image):
count    498.000000
mean       0.134860
std        0.128772
min        0.000235
25%        0.040943
50%        0.091678
75%        0.193834
max        0.674005
Name: area_ratio, dtype: float64

[INSIGHT] Number of components per image:
count    498.000000
mean       4.261044
std        6.239045
min        1.000000
25%        1.000000
50%        2.000000
75%        5.000000
max       67.000000
Name: num_components, dtype: float64

[INSIGHT] Dominant component ratio:
count    498.000000
mean       0.112599
std        0.119287
min        0.000235
25%        0.030156
50%        0.066428
75%        0.162189
max        0.636689
Name: max_component_ratio, dtype: float64

[INSIGHT] Connected component area (pixels):
count    2.122000e+03
mean     5.588544e+04
std      3.030841e+05
min      1.000000e+00
25%      3.930000e+02
50




# Preprocessing & Data Augmentation

In [2]:
# ===============================
# Imports
# ===============================
import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2
import numpy as np
import random

# ===============================
# Global Config
# ===============================
IMG_SIZE = 512
POS_VALUE = 255

NEGATIVE_CROP_PROB = 0.3   # 30% background-only crop

# ===============================
# Mask Sanity
# ===============================
def sanitize_mask(mask: np.ndarray) -> np.ndarray:
    return (mask > 0).astype(np.uint8) * POS_VALUE

# ===============================
# TRAIN AUGMENTATION (CROP-BASED)
# ===============================
train_tfms = A.Compose(
    [
        # --- crop instead of resize ---
        A.RandomCrop(IMG_SIZE, IMG_SIZE),

        # --- geometry ---
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.2),
        A.Rotate(limit=10, p=0.3),

        # --- illumination ---
        A.RandomBrightnessContrast(
            brightness_limit=0.2,
            contrast_limit=0.2,
            p=0.5
        ),
        A.RandomGamma(
            gamma_limit=(80, 120),
            p=0.3
        ),

        # --- texture robustness ---
        A.GaussianBlur(blur_limit=(3, 5), p=0.15),
        A.GaussNoise(p=0.15),

        # --- normalize ---
        A.Normalize(
            mean=(0.485, 0.456, 0.406),
            std=(0.229, 0.224, 0.225),
        ),
        ToTensorV2(),
    ]
)

# ===============================
# VAL / TEST AUGMENTATION
# ===============================
val_tfms = A.Compose(
    [
        A.Resize(IMG_SIZE, IMG_SIZE),
        A.Normalize(
            mean=(0.485, 0.456, 0.406),
            std=(0.229, 0.224, 0.225),
        ),
        ToTensorV2(),
    ]
)

# ===============================
# Dataset (NEGATIVE-AWARE)
# ===============================
class PotholeDataset:
    def __init__(self, image_paths, mask_paths, transforms):
        self.image_paths = image_paths
        self.mask_paths = mask_paths
        self.transforms = transforms

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img = cv2.imread(str(self.image_paths[idx]))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        mask = cv2.imread(str(self.mask_paths[idx]), cv2.IMREAD_GRAYSCALE)
        mask = sanitize_mask(mask)

        # ---- NEGATIVE CROP LOGIC ----
        if random.random() < NEGATIVE_CROP_PROB:
            # force background-only crop
            h, w = mask.shape
            for _ in range(10):
                y = random.randint(0, h - IMG_SIZE)
                x = random.randint(0, w - IMG_SIZE)
                patch = mask[y:y+IMG_SIZE, x:x+IMG_SIZE]
                if patch.sum() == 0:
                    img = img[y:y+IMG_SIZE, x:x+IMG_SIZE]
                    mask = patch
                    break

        augmented = self.transforms(image=img, mask=mask)
        img = augmented["image"]
        mask = augmented["mask"] / POS_VALUE

        return img, mask.float()

# ===============================
# Sanity Check
# ===============================
if __name__ == "__main__":
    print("[STAGE 2 CHECK — REVISED]")
    dummy_img = np.zeros((1024, 1024, 3), dtype=np.uint8)
    dummy_mask = np.zeros((1024, 1024), dtype=np.uint8)

    out = train_tfms(image=dummy_img, mask=dummy_mask)
    print("✓ Train transform OK")

    out = val_tfms(image=dummy_img)
    print("✓ Val/Test transform OK")

    print("[STAGE 2 REVISED — READY FOR STAGE 3]")


[STAGE 2 CHECK — REVISED]
✓ Train transform OK
✓ Val/Test transform OK
[STAGE 2 REVISED — READY FOR STAGE 3]


# Model Construction & Training

In [3]:
!pip install -q segmentation-models-pytorch==0.3.3 timm

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.8/58.8 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m68.5/68.5 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m106.7/106.7 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for efficientnet-pytorch (setup.py) ... [?25l[?25hdone
  Building wheel for pretrainedmodels (setup.py) ... [?25l[?25hdone


In [4]:
# ===============================
# Imports
# ===============================
import random
import numpy as np
from sklearn.model_selection import train_test_split
from tqdm import tqdm

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.amp import autocast, GradScaler

import segmentation_models_pytorch as smp
import cv2

# ===============================
# Config
# ===============================
SEED = 42
BATCH_SIZE = 4
EPOCHS = 30
LR = 2e-4
WEIGHT_DECAY = 1e-2
VAL_RATIO = 0.15
ACCUM_STEPS = 2        # gradient accumulation
IMG_SIZE = 512

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", DEVICE)

# ===============================
# Seed
# ===============================
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

# ===============================
# Dataset (SAFE CROP)
# ===============================
class PotholeDataset:
    def __init__(self, image_paths, mask_paths=None, transforms=None):
        self.image_paths = image_paths
        self.mask_paths = mask_paths
        self.transforms = transforms

    def __len__(self):
        return len(self.image_paths)

    def random_crop_safe(self, img, mask=None, size=512):
        h, w = img.shape[:2]

        if h >= size and w >= size:
            y = random.randint(0, h - size)
            x = random.randint(0, w - size)
            img = img[y:y+size, x:x+size]
            if mask is not None:
                mask = mask[y:y+size, x:x+size]
        else:
            img = cv2.resize(img, (size, size), interpolation=cv2.INTER_LINEAR)
            if mask is not None:
                mask = cv2.resize(mask, (size, size), interpolation=cv2.INTER_NEAREST)

        return img, mask

    def __getitem__(self, idx):
        img = cv2.imread(str(self.image_paths[idx]))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        if self.mask_paths is not None:
            mask = cv2.imread(str(self.mask_paths[idx]), cv2.IMREAD_GRAYSCALE)
            mask = (mask > 0).astype(np.uint8) * 255

            img, mask = self.random_crop_safe(img, mask, IMG_SIZE)

            augmented = self.transforms(image=img, mask=mask)
            img = augmented["image"]
            mask = augmented["mask"] / 255.0

            return img, mask.float()
        else:
            img, _ = self.random_crop_safe(img, None, IMG_SIZE)
            augmented = self.transforms(image=img)
            return augmented["image"]

# ===============================
# Train / Val Split
# ===============================
idx_all = np.arange(len(train_imgs))
idx_tr, idx_val = train_test_split(
    idx_all,
    test_size=VAL_RATIO,
    random_state=SEED,
    shuffle=True
)

train_imgs_tr = [train_imgs[i] for i in idx_tr]
train_msks_tr = [train_msks[i] for i in idx_tr]
val_imgs_tr   = [train_imgs[i] for i in idx_val]
val_msks_tr   = [train_msks[i] for i in idx_val]

print(f"[INFO] Train: {len(train_imgs_tr)} | Val: {len(val_imgs_tr)}")

# ===============================
# Datasets
# ===============================
train_dataset = PotholeDataset(
    train_imgs_tr,
    train_msks_tr,
    transforms=train_tfms
)

val_dataset = PotholeDataset(
    val_imgs_tr,
    val_msks_tr,
    transforms=val_tfms
)

# ===============================
# DataLoaders
# ===============================
train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=2,
    pin_memory=True
)

val_loader = DataLoader(
    val_dataset,
    batch_size=1,
    shuffle=False,
    num_workers=2,
    pin_memory=True
)

# ===============================
# Loss (SMALL OBJECT FRIENDLY)
# ===============================
dice_loss = smp.losses.DiceLoss(mode="binary", from_logits=True)
focal_loss = smp.losses.FocalLoss(
    mode="binary",
    alpha=0.75,
    gamma=2.0,
    normalized=True
)

def criterion(logits, targets):
    return dice_loss(logits, targets) + 0.5 * focal_loss(logits, targets)

# ===============================
# Dice Metric (SOFT)
# ===============================
@torch.no_grad()
def dice_soft(probs, targets, eps=1e-7):
    inter = (probs * targets).sum()
    union = probs.sum() + targets.sum()
    return (2 * inter + eps) / (union + eps)

# ===============================
# Models
# ===============================
def build_unetpp():
    return smp.UnetPlusPlus(
        encoder_name="efficientnet-b4",
        encoder_weights="imagenet",
        in_channels=3,
        classes=1,
    )

def build_deeplab():
    return smp.DeepLabV3Plus(
        encoder_name="resnet101",
        encoder_weights="imagenet",
        in_channels=3,
        classes=1,
    )

# ===============================
# Train / Val loops
# ===============================
def train_one_epoch(model, loader, optimizer, scaler, epoch, name):
    model.train()
    total_loss = 0.0
    optimizer.zero_grad()

    pbar = tqdm(loader, desc=f"{name} | Epoch {epoch}", leave=False)

    for step, (imgs, masks) in enumerate(pbar):
        imgs = imgs.to(DEVICE)
        masks = masks.to(DEVICE).unsqueeze(1)

        with autocast(device_type="cuda"):
            logits = model(imgs)
            loss = criterion(logits, masks) / ACCUM_STEPS

        scaler.scale(loss).backward()

        if (step + 1) % ACCUM_STEPS == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()

        total_loss += loss.item() * ACCUM_STEPS
        pbar.set_postfix(loss=f"{loss.item() * ACCUM_STEPS:.4f}")

    return total_loss / len(loader)

@torch.no_grad()
def validate(model, loader):
    model.eval()
    dices = []

    for imgs, masks in loader:
        imgs = imgs.to(DEVICE)
        masks = masks.to(DEVICE).unsqueeze(1)

        logits = model(imgs)
        probs = torch.sigmoid(logits)
        dices.append(dice_soft(probs, masks).item())

    return float(np.mean(dices))

# ===============================
# Training Wrapper
# ===============================
def train_model(name, model):
    model = model.to(DEVICE)

    optimizer = AdamW(
        model.parameters(),
        lr=LR,
        weight_decay=WEIGHT_DECAY
    )

    scheduler = CosineAnnealingLR(
        optimizer,
        T_max=EPOCHS
    )

    scaler = GradScaler()
    best_dice = 0.0
    ckpt_path = f"{name}_best.pt"

    for epoch in range(1, EPOCHS + 1):
        tr_loss = train_one_epoch(model, train_loader, optimizer, scaler, epoch, name)
        val_dice = validate(model, val_loader)
        scheduler.step()

        print(
            f"{name} | Epoch {epoch:02d} | "
            f"TrainLoss {tr_loss:.4f} | ValDice {val_dice:.4f}"
        )

        if val_dice > best_dice:
            best_dice = val_dice
            torch.save(model.state_dict(), ckpt_path)
            print(f">> Best {name} saved")

    print(f"[DONE] {name} best Val Dice: {best_dice:.4f}")
    return ckpt_path, best_dice

# ===============================
# RUN
# ===============================
print("\n===== TRAINING UNET++ =====")
unetpp_ckpt, unetpp_dice = train_model("unetpp", build_unetpp())

print("\n===== TRAINING DEEPLAB =====")
deeplab_ckpt, deeplab_dice = train_model("deeplab", build_deeplab())

print("\n[STAGE 3 COMPLETE]")
print(f"UNet++  best Dice: {unetpp_dice:.4f}")
print(f"DeepLab best Dice: {deeplab_dice:.4f}")
print("→ Ready for STAGE 4")



Device: cuda
[INFO] Train: 423 | Val: 75

===== TRAINING UNET++ =====
Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b4-6ed6700e.pth" to /root/.cache/torch/hub/checkpoints/efficientnet-b4-6ed6700e.pth


100%|██████████| 74.4M/74.4M [00:00<00:00, 191MB/s]
                                                                                

unetpp | Epoch 01 | TrainLoss 0.7037 | ValDice 0.4221
>> Best unetpp saved


                                                                                

unetpp | Epoch 02 | TrainLoss 0.5645 | ValDice 0.4587
>> Best unetpp saved


                                                                                

unetpp | Epoch 03 | TrainLoss 0.4801 | ValDice 0.5163
>> Best unetpp saved


                                                                                

unetpp | Epoch 04 | TrainLoss 0.4088 | ValDice 0.5319
>> Best unetpp saved


                                                                                

unetpp | Epoch 05 | TrainLoss 0.3835 | ValDice 0.5769
>> Best unetpp saved


                                                                                

unetpp | Epoch 06 | TrainLoss 0.3461 | ValDice 0.5636


                                                                                

unetpp | Epoch 07 | TrainLoss 0.3213 | ValDice 0.6027
>> Best unetpp saved


                                                                                

unetpp | Epoch 08 | TrainLoss 0.3074 | ValDice 0.5702


                                                                                

unetpp | Epoch 09 | TrainLoss 0.2902 | ValDice 0.5934


                                                                                 

unetpp | Epoch 10 | TrainLoss 0.3003 | ValDice 0.6206
>> Best unetpp saved


                                                                                 

unetpp | Epoch 11 | TrainLoss 0.2844 | ValDice 0.6090


                                                                                 

unetpp | Epoch 12 | TrainLoss 0.2612 | ValDice 0.6047


                                                                                 

unetpp | Epoch 13 | TrainLoss 0.2560 | ValDice 0.6047


                                                                                 

unetpp | Epoch 14 | TrainLoss 0.2564 | ValDice 0.5871


                                                                                 

unetpp | Epoch 15 | TrainLoss 0.2451 | ValDice 0.5824


                                                                                 

unetpp | Epoch 16 | TrainLoss 0.2296 | ValDice 0.6010


                                                                                 

unetpp | Epoch 17 | TrainLoss 0.2123 | ValDice 0.6431
>> Best unetpp saved


                                                                                 

unetpp | Epoch 18 | TrainLoss 0.2350 | ValDice 0.5982


                                                                                 

unetpp | Epoch 19 | TrainLoss 0.2264 | ValDice 0.6017


                                                                                 

unetpp | Epoch 20 | TrainLoss 0.2183 | ValDice 0.6056


                                                                                 

unetpp | Epoch 21 | TrainLoss 0.1965 | ValDice 0.6141


                                                                                 

unetpp | Epoch 22 | TrainLoss 0.2001 | ValDice 0.6383


                                                                                 

unetpp | Epoch 23 | TrainLoss 0.1935 | ValDice 0.6078


                                                                                 

unetpp | Epoch 24 | TrainLoss 0.1956 | ValDice 0.6612
>> Best unetpp saved


                                                                                 

unetpp | Epoch 25 | TrainLoss 0.1861 | ValDice 0.6438


                                                                                 

unetpp | Epoch 26 | TrainLoss 0.2176 | ValDice 0.6220


                                                                                 

unetpp | Epoch 27 | TrainLoss 0.1810 | ValDice 0.6260


                                                                                 

unetpp | Epoch 28 | TrainLoss 0.1988 | ValDice 0.6324


                                                                                 

unetpp | Epoch 29 | TrainLoss 0.1971 | ValDice 0.6443


                                                                                 

unetpp | Epoch 30 | TrainLoss 0.2004 | ValDice 0.6548
[DONE] unetpp best Val Dice: 0.6612

===== TRAINING DEEPLAB =====
Downloading: "https://download.pytorch.org/models/resnet101-5d3b4d8f.pth" to /root/.cache/torch/hub/checkpoints/resnet101-5d3b4d8f.pth


100%|██████████| 170M/170M [00:00<00:00, 306MB/s]
                                                                                 

deeplab | Epoch 01 | TrainLoss 0.6302 | ValDice 0.3676
>> Best deeplab saved


                                                                                 

deeplab | Epoch 02 | TrainLoss 0.5522 | ValDice 0.4152
>> Best deeplab saved


                                                                                 

deeplab | Epoch 03 | TrainLoss 0.5042 | ValDice 0.4440
>> Best deeplab saved


                                                                                 

deeplab | Epoch 04 | TrainLoss 0.4760 | ValDice 0.4775
>> Best deeplab saved


                                                                                 

deeplab | Epoch 05 | TrainLoss 0.4290 | ValDice 0.4984
>> Best deeplab saved


                                                                                 

deeplab | Epoch 06 | TrainLoss 0.4293 | ValDice 0.5471
>> Best deeplab saved


                                                                                 

deeplab | Epoch 07 | TrainLoss 0.4205 | ValDice 0.5609
>> Best deeplab saved


                                                                                 

deeplab | Epoch 08 | TrainLoss 0.3928 | ValDice 0.5077


                                                                                 

deeplab | Epoch 09 | TrainLoss 0.3920 | ValDice 0.5760
>> Best deeplab saved


                                                                                  

deeplab | Epoch 10 | TrainLoss 0.3749 | ValDice 0.5482


                                                                                  

deeplab | Epoch 11 | TrainLoss 0.3652 | ValDice 0.5166


                                                                                  

deeplab | Epoch 12 | TrainLoss 0.3232 | ValDice 0.5495


                                                                                  

deeplab | Epoch 13 | TrainLoss 0.3086 | ValDice 0.5870
>> Best deeplab saved


                                                                                  

deeplab | Epoch 14 | TrainLoss 0.3324 | ValDice 0.5449


                                                                                  

deeplab | Epoch 15 | TrainLoss 0.3158 | ValDice 0.5831


                                                                                  

deeplab | Epoch 16 | TrainLoss 0.2789 | ValDice 0.5729


                                                                                  

deeplab | Epoch 17 | TrainLoss 0.3004 | ValDice 0.5544


                                                                                  

deeplab | Epoch 18 | TrainLoss 0.2974 | ValDice 0.5642


                                                                                  

deeplab | Epoch 19 | TrainLoss 0.2718 | ValDice 0.5689


                                                                                  

deeplab | Epoch 20 | TrainLoss 0.2842 | ValDice 0.5976
>> Best deeplab saved


                                                                                  

deeplab | Epoch 21 | TrainLoss 0.2763 | ValDice 0.6049
>> Best deeplab saved


                                                                                  

deeplab | Epoch 22 | TrainLoss 0.2494 | ValDice 0.5625


                                                                                  

deeplab | Epoch 23 | TrainLoss 0.2475 | ValDice 0.5708


                                                                                  

deeplab | Epoch 24 | TrainLoss 0.2531 | ValDice 0.5537


                                                                                  

deeplab | Epoch 25 | TrainLoss 0.2251 | ValDice 0.5821


                                                                                  

deeplab | Epoch 26 | TrainLoss 0.2297 | ValDice 0.5846


                                                                                  

deeplab | Epoch 27 | TrainLoss 0.2427 | ValDice 0.5579


                                                                                  

deeplab | Epoch 28 | TrainLoss 0.2331 | ValDice 0.5711


                                                                                  

deeplab | Epoch 29 | TrainLoss 0.2500 | ValDice 0.6247
>> Best deeplab saved


                                                                                  

deeplab | Epoch 30 | TrainLoss 0.2081 | ValDice 0.5897
[DONE] deeplab best Val Dice: 0.6247

[STAGE 3 COMPLETE]
UNet++  best Dice: 0.6612
DeepLab best Dice: 0.6247
→ Ready for STAGE 4


# Optimization, Validation & Refinement

In [5]:
# ============================================================
# STAGE 4 — OPTIMIZATION, VALIDATION & REFINEMENT (UPGRADED)
# LB-STABLE SELECTION
# ============================================================

import numpy as np
from tqdm import tqdm
import cv2
import torch
import segmentation_models_pytorch as smp

# ===============================
# Load BEST model (UNet++)
# ===============================
def load_unetpp():
    model = smp.UnetPlusPlus(
        encoder_name="efficientnet-b4",
        encoder_weights=None,
        in_channels=3,
        classes=1,
    )
    model.load_state_dict(torch.load("unetpp_best.pt", map_location=DEVICE))
    model.to(DEVICE)
    model.eval()
    return model

model = load_unetpp()
print("[INFO] UNet++ best model loaded")

# ===============================
# Dice metrics
# ===============================
@torch.no_grad()
def dice_soft(probs, targets, eps=1e-7):
    inter = (probs * targets).sum()
    union = probs.sum() + targets.sum()
    return (2 * inter + eps) / (union + eps)

@torch.no_grad()
def dice_hard(preds, targets, eps=1e-7):
    inter = (preds * targets).sum()
    union = preds.sum() + targets.sum()
    return (2 * inter + eps) / (union + eps)

# ===============================
# Post-process (SAME AS STAGE 5)
# ===============================
def post_process(prob, thr, min_area):
    mask = (prob > thr).astype(np.uint8)

    kernel = np.ones((3, 3), np.uint8)
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)

    num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(mask, 8)
    out = np.zeros_like(mask)

    for i in range(1, num_labels):
        if stats[i, cv2.CC_STAT_AREA] >= min_area:
            out[labels == i] = 1

    return out

# ===============================
# Sweep space (FOCUSED)
# ===============================
thresholds = np.arange(0.24, 0.36, 0.02)
min_areas  = [20, 30, 40, 60]

records = []

print("\n[OPTIMIZATION] LB-stable sweep...")

val_dataset = PotholeDataset(
    val_imgs_tr,
    val_msks_tr,
    transforms=val_tfms
)

val_loader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=1,
    shuffle=False
)

for thr in thresholds:
    for min_area in min_areas:
        dices_h = []
        dices_s = []
        empty_cnt = []
        comp_counts = []

        for img, mask in val_loader:
            img = img.to(DEVICE)
            gt = mask.numpy()[0]

            with torch.no_grad():
                prob = torch.sigmoid(model(img)).cpu().numpy()[0, 0]

            pred = post_process(prob, thr, min_area)

            dices_h.append(dice_hard(pred, gt).item())
            dices_s.append(dice_soft(prob, gt).item())

            empty_cnt.append(pred.sum() == 0)
            comp_counts.append(
                cv2.connectedComponents(pred.astype(np.uint8))[0] - 1
            )

        records.append({
            "thr": thr,
            "min_area": min_area,
            "dice_hard": np.mean(dices_h),
            "dice_soft": np.mean(dices_s),
            "empty_ratio": np.mean(empty_cnt),
            "mean_components": np.mean(comp_counts)
        })

        print(
            f"thr={thr:.2f} | area={min_area:3d} | "
            f"DiceH={np.mean(dices_h):.4f} | "
            f"DiceS={np.mean(dices_s):.4f} | "
            f"Empty={np.mean(empty_cnt):.2%}"
        )

# ===============================
# SELECT LB-STABLE CONFIG (SAFE)
# ===============================

# convert to list of dicts (already is)
assert len(records) > 0, "No sweep records generated"

best_soft = max(r["dice_soft"] for r in records)
best_hard = max(r["dice_hard"] for r in records)

# ---------- Tier 1: ideal ----------
candidates = [
    r for r in records
    if r["dice_soft"] >= 0.97 * best_soft
    and r["empty_ratio"] < 0.15
]

# ---------- Tier 2: relax empty constraint ----------
if len(candidates) == 0:
    candidates = [
        r for r in records
        if r["dice_soft"] >= 0.95 * best_soft
        and r["empty_ratio"] < 0.25
    ]

# ---------- Tier 3: fallback to hard dice ----------
if len(candidates) == 0:
    candidates = [
        r for r in records
        if r["dice_hard"] >= 0.98 * best_hard
    ]

# ---------- Final safety ----------
assert len(candidates) > 0, "STAGE 4 FAILED: no valid threshold candidates"

def score_fn(r):
    # prefer hard dice, penalize over-fragmentation
    return (
        r["dice_hard"]
        - 0.015 * abs(r["mean_components"] - 2.5)
        - 0.10 * r["empty_ratio"]
    )

best_cfg = max(candidates, key=score_fn)

OPT_CONFIG = {
    "thr": float(best_cfg["thr"]),
    "min_area": int(best_cfg["min_area"]),
}

print("\n[BEST LB-STABLE CONFIG — SAFE]")
print(f"Threshold : {best_cfg['thr']:.2f}")
print(f"Min area  : {best_cfg['min_area']}")
print(f"DiceHard : {best_cfg['dice_hard']:.4f}")
print(f"DiceSoft : {best_cfg['dice_soft']:.4f}")
print(f"Empty %  : {best_cfg['empty_ratio']:.2%}")
print(f"Mean comp: {best_cfg['mean_components']:.2f}")

[INFO] UNet++ best model loaded

[OPTIMIZATION] LB-stable sweep...
thr=0.24 | area= 20 | DiceH=0.8411 | DiceS=0.6100 | Empty=25.33%
thr=0.24 | area= 30 | DiceH=0.7623 | DiceS=0.6245 | Empty=18.67%
thr=0.24 | area= 40 | DiceH=0.7581 | DiceS=0.5939 | Empty=22.67%
thr=0.24 | area= 60 | DiceH=0.7847 | DiceS=0.6206 | Empty=21.33%
thr=0.26 | area= 20 | DiceH=0.7750 | DiceS=0.6356 | Empty=18.67%
thr=0.26 | area= 30 | DiceH=0.7885 | DiceS=0.6509 | Empty=17.33%
thr=0.26 | area= 40 | DiceH=0.7732 | DiceS=0.6612 | Empty=16.00%
thr=0.26 | area= 60 | DiceH=0.7644 | DiceS=0.6508 | Empty=17.33%
thr=0.28 | area= 20 | DiceH=0.7991 | DiceS=0.6467 | Empty=20.00%
thr=0.28 | area= 30 | DiceH=0.8021 | DiceS=0.6111 | Empty=21.33%
thr=0.28 | area= 40 | DiceH=0.7432 | DiceS=0.6054 | Empty=18.67%
thr=0.28 | area= 60 | DiceH=0.7761 | DiceS=0.6374 | Empty=24.00%
thr=0.30 | area= 20 | DiceH=0.8103 | DiceS=0.6454 | Empty=18.67%
thr=0.30 | area= 30 | DiceH=0.8159 | DiceS=0.6251 | Empty=21.33%
thr=0.30 | area= 40 | D

# Inference, Encoding & Submission

In [6]:
# ============================================================
# STAGE 5 — STRUCTURAL ENSEMBLE INFERENCE & SUBMISSION
# FINAL SAFE VERSION (ANTI-NAMESPACE COLLISION)
# ============================================================

import numpy as np
import pandas as _pd   # <<< ALIAS AMAN
import torch
import cv2
from pathlib import Path
from tqdm import tqdm
import segmentation_models_pytorch as smp

# -----------------------------
# CONFIG
# -----------------------------
DATA_ROOT = Path("/kaggle/input/data-science-ara-7-0/dataset/dataset")
TEST_IMG_DIR = DATA_ROOT / "test" / "images"
OUT_SUB = "/kaggle/working/submission.csv"

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# FROM STAGE 4 (LB-STABLE)
THR_U = 0.26
THR_D = 0.32
MIN_AREA = 60

INPUT_SIZE = 512

IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD  = (0.229, 0.224, 0.225)

print("[CONFIG]")
print("thr_unetpp :", THR_U)
print("thr_deeplab:", THR_D)
print("min_area  :", MIN_AREA)

# -----------------------------
# LOAD MODELS
# -----------------------------
unetpp = smp.UnetPlusPlus(
    encoder_name="efficientnet-b4",
    encoder_weights=None,
    in_channels=3,
    classes=1,
).to(DEVICE)

deeplab = smp.DeepLabV3Plus(
    encoder_name="resnet101",
    encoder_weights=None,
    in_channels=3,
    classes=1,
).to(DEVICE)

unetpp.load_state_dict(
    torch.load("/kaggle/working/unetpp_best.pt", map_location=DEVICE)
)
deeplab.load_state_dict(
    torch.load("/kaggle/working/deeplab_best.pt", map_location=DEVICE)
)

unetpp.eval()
deeplab.eval()

print("[INFO] Models loaded")

# -----------------------------
# RLE ENCODER (OFFICIAL)
# -----------------------------
def encode_rle(mask: np.ndarray) -> str:
    binary = (mask == 1).astype(np.uint8)
    pixels = binary.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[0::2]
    return " ".join(str(x) for x in runs)

# -----------------------------
# SMALL OBJECT FILTER
# -----------------------------
def remove_small_objects(mask, min_area):
    num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(
        mask.astype(np.uint8), connectivity=8
    )
    clean = np.zeros_like(mask, dtype=np.uint8)
    for i in range(1, num_labels):
        if stats[i, cv2.CC_STAT_AREA] >= min_area:
            clean[labels == i] = 1
    return clean

# -----------------------------
# LOAD TEST FILES
# -----------------------------
test_images = sorted(TEST_IMG_DIR.glob("*.jpg"))
print("[INFO] Test images:", len(test_images))
assert len(test_images) == 295, "TEST IMAGE COUNT HARUS 295"

# -----------------------------
# INFERENCE + H-FLIP TTA
# -----------------------------
records = []

with torch.no_grad():
    for img_path in tqdm(test_images, desc="Structural Ensemble Inference"):
        img_name = img_path.name

        img = cv2.imread(str(img_path))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        h0, w0 = img.shape[:2]

        img_r = cv2.resize(img, (INPUT_SIZE, INPUT_SIZE)).astype(np.float32) / 255.0
        for c in range(3):
            img_r[..., c] = (img_r[..., c] - IMAGENET_MEAN[c]) / IMAGENET_STD[c]

        x = torch.from_numpy(img_r.transpose(2, 0, 1)).unsqueeze(0).to(DEVICE)
        x_flip = torch.flip(x, dims=[3])

        pu = torch.sigmoid(unetpp(x))
        pd = torch.sigmoid(deeplab(x))

        pu_f = torch.flip(torch.sigmoid(unetpp(x_flip)), dims=[3])
        pd_f = torch.flip(torch.sigmoid(deeplab(x_flip)), dims=[3])

        pu = ((pu + pu_f) / 2.0)[0, 0].cpu().numpy()
        pd = ((pd + pd_f) / 2.0)[0, 0].cpu().numpy()

        mask_u = (pu > THR_U).astype(np.uint8)

        if mask_u.sum() == 0:
            pred = np.zeros((h0, w0), dtype=np.uint8)
        else:
            mask_d = (pd > THR_D).astype(np.uint8)
            mask_d = remove_small_objects(mask_d, MIN_AREA)

            if mask_d.sum() == 0:
                pred = cv2.resize(mask_u, (w0, h0), interpolation=cv2.INTER_NEAREST)
            else:
                mask_d = cv2.dilate(mask_d, np.ones((3, 3), np.uint8))
                pred = mask_u & mask_d
                pred = cv2.resize(pred, (w0, h0), interpolation=cv2.INTER_NEAREST)

        rle = "" if pred.sum() == 0 else encode_rle(pred)

        records.append({
            "ImageId": img_name,
            "rle": rle
        })

# -----------------------------
# SAVE SUBMISSION (SAFE)
# -----------------------------
df_sub = _pd.DataFrame(records, columns=["ImageId", "rle"])
df_sub.to_csv(OUT_SUB, index=False)

print("\n[STAGE 5 COMPLETE — SUBMISSION READY]")
print("Saved to:", OUT_SUB)
print("Rows:", len(df_sub))
print("Empty RLE:", (df_sub["rle"] == "").sum())


[CONFIG]
thr_unetpp : 0.26
thr_deeplab: 0.32
min_area  : 60
[INFO] Models loaded
[INFO] Test images: 295


Structural Ensemble Inference: 100%|██████████| 295/295 [00:59<00:00,  4.97it/s]



[STAGE 5 COMPLETE — SUBMISSION READY]
Saved to: /kaggle/working/submission.csv
Rows: 295
Empty RLE: 4
