In [1]:
# ============================================================
# ðŸ”Ž Mine crack tiles from KolektorSDD (fine masks) â†’ ImageFolder
#   - expects: KolektorSDD/kos01..kos50/Part{i}.jpg + Part{i}_label.bmp
#   - writes:  Data/NEU-DET/train/images/cracks/*.png
#              Data/NEU-DET/validation/images/cracks/*.png
# ============================================================

from pathlib import Path
from PIL import Image
import numpy as np
import torchvision.transforms.functional as TF

# ---- Paths (adjust if needed)
KO_ROOT   = Path("Data/KolektorSDD")                   # where kos01..kos50 live
OUT_TRAIN = Path("Data/NEU-DET/train/images/cracks")   # your existing layout
OUT_VAL   = Path("Data/NEU-DET/validation/images/cracks")

OUT_TRAIN.mkdir(parents=True, exist_ok=True)
OUT_VAL.mkdir(parents=True, exist_ok=True)

# ---- Tiling + selection settings
TILE      = 128     # output tile size to match your IMG_SIZE
STRIDE    = 96      # overlap (smaller stride -> more tiles)
POS_FRAC  = 0.01    # >=1% mask pixels inside tile => positive
NEG_RATE  = 0.05    # random sample fraction of empty tiles as negatives
VAL_SPLIT = 0.20    # per-image split for pos/neg tiles

rng = np.random.default_rng(42)

def _iter_kolektor_pairs(root: Path):
    """Yield (image_path, mask_path) for all kosXX/Part*.jpg that have *_label.bmp."""
    for kos in sorted(root.glob("kos*")):
        if not kos.is_dir(): 
            continue
        for img_path in sorted(kos.glob("Part*.jpg")):
            mask_path = img_path.with_name(img_path.stem + "_label.bmp")
            if mask_path.exists():
                yield img_path, mask_path

def _mask_fraction(mask_tensor):
    # mask_tensor: [1,H,W] in {0,1}
    return float(mask_tensor.mean().item())

def _save_tile(im: Image.Image, path: Path):
    # convert to grayscale tile of size TILEÃ—TILE
    im = im.convert("L").resize((TILE, TILE), Image.BILINEAR)
    im.save(path)

pos_tiles, neg_tiles = [], []
pairs = list(_iter_kolektor_pairs(KO_ROOT))
print(f"Found {len(pairs)} Kolektor image/mask pairs.")

# Collect tiles
for img_path, msk_path in pairs:
    I = Image.open(img_path).convert("L")
    M = Image.open(msk_path).convert("L")  # 0 background, 255 crack
    W, H = I.size

    I_t = TF.to_tensor(I)                  # [1,H,W] in [0,1]
    M_t = (TF.to_tensor(M) > 0.5).float()  # binary [1,H,W]

    # Slide a window; pick crack tiles + a sparse set of empty negatives
    for y in range(0, H - TILE + 1, STRIDE):
        for x in range(0, W - TILE + 1, STRIDE):
            m = M_t[:, y:y+TILE, x:x+TILE]
            frac = _mask_fraction(m)
            if frac >= POS_FRAC:
                pos_tiles.append((img_path, (x, y, x+TILE, y+TILE)))
            elif frac == 0.0 and rng.random() < NEG_RATE:
                neg_tiles.append((img_path, (x, y, x+TILE, y+TILE)))

print(f"Raw tiles â†’ pos={len(pos_tiles)} neg={len(neg_tiles)}")

# Balance negatives to positives
if len(neg_tiles) > len(pos_tiles):
    neg_tiles = list(rng.choice(neg_tiles, size=len(pos_tiles), replace=False))
print(f"Balanced neg={len(neg_tiles)} to match pos={len(pos_tiles)}")

# Split per-tile into train/val
n_pos_val = int(len(pos_tiles) * VAL_SPLIT)
n_neg_val = int(len(neg_tiles) * VAL_SPLIT)
val_idx_p = set(rng.choice(len(pos_tiles), size=n_pos_val, replace=False))
val_idx_n = set(rng.choice(len(neg_tiles), size=n_neg_val, replace=False))

pos_tr = [t for i,t in enumerate(pos_tiles) if i not in val_idx_p]
pos_va = [t for i,t in enumerate(pos_tiles) if i in val_idx_p]
neg_tr = [t for i,t in enumerate(neg_tiles) if i not in val_idx_n]
neg_va = [t for i,t in enumerate(neg_tiles) if i in val_idx_n]

def _write_tiles(tiles, out_dir, prefix):
    out_dir.mkdir(parents=True, exist_ok=True)
    for i, (ipath, (x1,y1,x2,y2)) in enumerate(tiles):
        tile_img = Image.open(ipath).convert("L").crop((x1,y1,x2,y2))
        _save_tile(tile_img, out_dir / f"{prefix}_{ipath.parent.name}_{ipath.stem}_{i:06d}.png")

_write_tiles(pos_tr, OUT_TRAIN, "crack_pos")
_write_tiles(neg_tr, OUT_TRAIN, "crack_neg")
_write_tiles(pos_va, OUT_VAL,   "crack_pos")
_write_tiles(neg_va, OUT_VAL,   "crack_neg")

print(f"âœ… crack tiles saved â†’ train={len(pos_tr)+len(neg_tr)}  val={len(pos_va)+len(neg_va)}")


Found 399 Kolektor image/mask pairs.
Raw tiles â†’ pos=179 neg=934


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 2 dimensions. The detected shape was (934, 2) + inhomogeneous part.