In [1]:
# Kaggle: install dependencies
!pip install -q "transformers>=4.41" albumentations opencv-python-headless timm


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m89.6 MB/s[0m eta [36m0:00:00[0m:00:01[0m0:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m68.2 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m50.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m0:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [4]:
# CLIPSeg fine-tuning on /kaggle/input/taping-cracks (cracks + taping)
# Outputs to /kaggle/working/ckpts_clipseg

import os, glob, random, json
from pathlib import Path
import numpy as np, cv2
from PIL import Image
from tqdm import tqdm

import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, ConcatDataset

import albumentations as A
from transformers import (
    CLIPSegProcessor,
    CLIPSegForImageSegmentation,
    get_cosine_schedule_with_warmup,
)

# =========================
# CONFIG
# =========================
BASE_CANDIDATES = [
    "/kaggle/input/taping-cracks",
    "/kaggle/input/taping-cracks/data copy",
    "/kaggle/input/taping-cracks/data_copy",
]
SAVE_DIR  = Path("/kaggle/working/ckpts_clipseg")
IMG_SIZE  = 512
BATCH     = 6
EPOCHS    = 15
LR        = 5e-5
SEED      = 42
WORKERS   = 2
WEIGHT_DECAY = 1e-4
WARMUP_FRAC  = 0.05
CLIP_GRAD_NORM = 1.0

CRACK_PROMPTS_TRAIN  = ["segment crack", "segment wall crack"]
CRACK_PROMPTS_VAL    = ["segment crack"]
TAPING_PROMPTS_TRAIN = ["segment taping area", "segment drywall seam", "segment joint/tape"]
TAPING_PROMPTS_VAL   = ["segment taping area"]

# =========================
# Utils
# =========================
def set_seed(s=SEED):
    random.seed(s); np.random.seed(s)
    torch.manual_seed(s); torch.cuda.manual_seed_all(s)
    torch.backends.cudnn.benchmark=False; torch.backends.cudnn.deterministic=True

def find_data_root(candidates):
    for base in candidates:
        root = Path(base)
        if (root/"cracks").exists() and (root/"taping").exists():
            return root
        if root.exists():
            for sub in root.iterdir():
                if sub.is_dir() and (sub/"cracks").exists() and (sub/"taping").exists():
                    return sub
    raise FileNotFoundError("Dataset not found in: " + ", ".join(candidates))

# ---- FIXED AUGMENTATIONS ----
def build_aug(img_size: int, for_cracks: bool):
    """
    Keep geometry light for cracks (thin), a bit stronger for taping.
    Use only Albumentations transforms that are broadly available on Kaggle.
    """
    common_photo = [
        A.RandomBrightnessContrast(0.2, 0.2, p=0.7),
        A.RandomGamma(gamma_limit=(90, 110), p=0.5),   # widely available
        A.GaussianBlur(blur_limit=(3, 3), p=0.2),      # mild blur instead of GaussNoise
    ]
    if for_cracks:
        return A.Compose([
            A.HorizontalFlip(p=0.5),
            A.Rotate(limit=5, border_mode=cv2.BORDER_CONSTANT, p=0.4),
            A.Affine(scale=(0.95, 1.05), translate_percent=0.04, p=0.4),
            *common_photo,
            A.Resize(img_size, img_size, interpolation=cv2.INTER_LINEAR),
        ])
    else:
        return A.Compose([
            A.HorizontalFlip(p=0.5),
            A.Rotate(limit=10, border_mode=cv2.BORDER_CONSTANT, p=0.5),
            A.Affine(scale=(0.9, 1.1), translate_percent=0.05, p=0.5),
            *common_photo,
            A.Resize(img_size, img_size, interpolation=cv2.INTER_LINEAR),
        ])


class PromptedSegDataset(Dataset):
    """ split_dir: .../cracks/train or .../taping/val """
    def __init__(self, split_dir, prompts, img_size, for_cracks, augment, processor):
        self.root = Path(split_dir)
        self.img_dir = self.root/"images"
        self.mask_dir= self.root/"masks"
        self.prompts = prompts
        self.processor = processor
        self.tf = build_aug(img_size, for_cracks) if augment else A.Compose([A.Resize(img_size, img_size, interpolation=cv2.INTER_LINEAR)])
        self.items=[]
        img_paths = sorted(glob.glob(str(self.img_dir/"*")))
        if not img_paths: raise RuntimeError(f"No images in {self.img_dir}")
        for ip in img_paths:
            base = Path(ip).stem
            mp=None
            for ext in (".png",".jpg",".jpeg"):
                cand = self.mask_dir/f"{base}{ext}"
                if cand.exists(): mp=str(cand); break
            self.items.append((ip, mp))
    def __len__(self): return len(self.items)
    def __getitem__(self, i):
        ip, mp = self.items[i]
        img = cv2.cvtColor(cv2.imread(ip, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB)
        if mp and os.path.exists(mp):
            mask = cv2.imread(mp, cv2.IMREAD_GRAYSCALE)
        else:
            mask = np.zeros(img.shape[:2], np.uint8)
        mask = (mask>127).astype(np.uint8)*255
        out = self.tf(image=img, mask=mask); img, mask = out["image"], out["mask"]
        prompt = random.choice(self.prompts)
        proc = self.processor(text=[prompt], images=[Image.fromarray(img)], padding="max_length", return_tensors="pt")
        return {
            "pixel_values": proc.pixel_values[0],
            "input_ids": proc.input_ids[0],
            "attention_mask": proc.attention_mask[0],
            "mask": torch.from_numpy((mask>127).astype(np.float32)),
            "prompt": prompt, "path": ip,
        }

def dice_coeff(prob, tgt, eps=1e-6):
    if prob.ndim==4: prob=prob[:,0]
    pred=(prob>0.5).float()
    inter=(pred*tgt).sum(dim=[1,2]); union=pred.sum(dim=[1,2])+tgt.sum(dim=[1,2])
    return ((2*inter+eps)/(union+eps)).mean().item()

def miou(prob, tgt, eps=1e-6):
    if prob.ndim==4: prob=prob[:,0]
    pred=(prob>0.5).float()
    inter=(pred*tgt).sum(dim=[1,2]); union=pred.sum(dim=[1,2])+tgt.sum(dim=[1,2])-inter
    return ((inter+eps)/(union+eps)).mean().item()

class BCEDice(torch.nn.Module):
    def __init__(self, w=0.5):
        super().__init__(); self.w=w; self.bce=torch.nn.BCEWithLogitsLoss()
    def forward(self, logits, tgt):
        bce=self.bce(logits.squeeze(1), tgt)
        prob=torch.sigmoid(logits).squeeze(1)
        inter=(prob*tgt).sum(dim=[1,2]); union=prob.sum(dim=[1,2])+tgt.sum(dim=[1,2])
        dice=1-(2*inter+1e-6)/(union+1e-6)
        return self.w*bce+(1-self.w)*dice.mean()

# =========================
# Train
# =========================
set_seed(SEED)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
SAVE_DIR.mkdir(parents=True, exist_ok=True)

DATA_ROOT = find_data_root(BASE_CANDIDATES)
print("Using DATA_ROOT:", DATA_ROOT)

processor = CLIPSegProcessor.from_pretrained("CIDAS/clipseg-rd64-refined")
model = CLIPSegForImageSegmentation.from_pretrained("CIDAS/clipseg-rd64-refined").to(device)

ds_tr = ConcatDataset([
    PromptedSegDataset(str(DATA_ROOT/"cracks"/"train"), CRACK_PROMPTS_TRAIN,  IMG_SIZE, True,  True,  processor),
    PromptedSegDataset(str(DATA_ROOT/"taping"/"train"), TAPING_PROMPTS_TRAIN, IMG_SIZE, False, True,  processor),
])
ds_va_cr = PromptedSegDataset(str(DATA_ROOT/"cracks"/"val"),  CRACK_PROMPTS_VAL,  IMG_SIZE, True,  False, processor)
ds_va_tp = PromptedSegDataset(str(DATA_ROOT/"taping"/"val"), TAPING_PROMPTS_VAL, IMG_SIZE, False, False, processor)
ds_va = ConcatDataset([ds_va_cr, ds_va_tp])

dl_tr = DataLoader(ds_tr, batch_size=BATCH, shuffle=True,  num_workers=WORKERS, pin_memory=True, drop_last=True)
dl_va = DataLoader(ds_va, batch_size=BATCH, shuffle=False, num_workers=WORKERS, pin_memory=True)

opt = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
steps = EPOCHS * len(dl_tr); warm = int(WARMUP_FRAC * steps)
sched = get_cosine_schedule_with_warmup(opt, num_warmup_steps=warm, num_training_steps=steps)
loss_fn = BCEDice(0.5)
use_amp = torch.cuda.is_available()
scaler = torch.amp.GradScaler(device="cuda") if use_amp else None

best_dice, best_path = -1.0, None
for epoch in range(1, EPOCHS+1):
    model.train()
    running = 0.0
    pbar = tqdm(dl_tr, total=len(dl_tr), desc=f"Epoch {epoch}/{EPOCHS}")

    for step, b in enumerate(pbar, 1):   # start at 1 to avoid div by zero
        px   = b["pixel_values"].to(device, non_blocking=True)
        ids  = b["input_ids"].to(device, non_blocking=True)
        attn = b["attention_mask"].to(device, non_blocking=True)
        gt   = b["mask"].to(device, non_blocking=True)

        opt.zero_grad(set_to_none=True)
        with torch.amp.autocast(device_type="cuda", enabled=use_amp):
            out = model(pixel_values=px, input_ids=ids, attention_mask=attn)
            logits = out.logits
            gt_rs  = F.interpolate(gt.unsqueeze(1), size=logits.shape[-2:], mode="nearest").squeeze(1)
            loss   = loss_fn(logits, gt_rs)

        if use_amp:
            scaler.scale(loss).backward()
            if CLIP_GRAD_NORM:
                scaler.unscale_(opt)
                torch.nn.utils.clip_grad_norm_(model.parameters(), CLIP_GRAD_NORM)
            scaler.step(opt); scaler.update()
        else:
            loss.backward()
            if CLIP_GRAD_NORM:
                torch.nn.utils.clip_grad_norm_(model.parameters(), CLIP_GRAD_NORM)
            opt.step()

        sched.step()
        running += loss.item()
        pbar.set_postfix(avg_loss=f"{running/step:.4f}")  # <- uses step, not pbar.n

    # ------- validation unchanged -------
    model.eval(); dices=[]; ious=[]
    with torch.no_grad():
        for b in tqdm(dl_va, leave=False, desc="Valid"):
            px   = b["pixel_values"].to(device)
            ids  = b["input_ids"].to(device)
            attn = b["attention_mask"].to(device)
            gt   = b["mask"].to(device)
            out  = model(pixel_values=px, input_ids=ids, attention_mask=attn)
            logits = out.logits
            gt_rs  = F.interpolate(gt.unsqueeze(1), size=logits.shape[-2:], mode="nearest").squeeze(1)
            prob   = torch.sigmoid(logits)
            dices.append(dice_coeff(prob, gt_rs))
            ious.append(miou(prob, gt_rs))
    d, m = float(np.mean(dices)), float(np.mean(ious))
    print(f"[Epoch {epoch}] Val Dice={d:.4f}  mIoU={m:.4f}")

    if d > best_dice:
        best_dice = d
        best_path = SAVE_DIR/f"clipseg_best_e{epoch}_dice{d:.4f}.pt"
        torch.save({"model": model.state_dict(), "epoch": epoch, "dice": d, "miou": m}, best_path)
        print("Saved:", best_path)

final_path = SAVE_DIR/"clipseg_final.pt"
torch.save({"model": model.state_dict()}, final_path)
print("Final:", final_path, "| Best:", best_path)

with open(SAVE_DIR/"inference_helper.py", "w") as f:
    f.write(f"""import torch, cv2
from PIL import Image
from transformers import CLIPSegProcessor, CLIPSegForImageSegmentation
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
processor = CLIPSegProcessor.from_pretrained("CIDAS/clipseg-rd64-refined")
model = CLIPSegForImageSegmentation.from_pretrained("CIDAS/clipseg-rd64-refined")
ckpt = r"{str(best_path if best_path else final_path)}"
sd = torch.load(ckpt, map_location="cpu"); sd = sd.get("model", sd)
model.load_state_dict(sd); model.to(device); model.eval()
def predict_mask(img_path, prompt, out_png, thr=0.5):
    inp = processor(text=[prompt], images=[Image.open(img_path).convert("RGB")], padding="max_length", return_tensors="pt")
    out = model(pixel_values=inp["pixel_values"].to(device),
                input_ids=inp["input_ids"].to(device),
                attention_mask=inp["attention_mask"].to(device))
    prob = torch.sigmoid(out.logits)[0,0].cpu().numpy()
    cv2.imwrite(out_png, (prob>thr).astype("uint8")*255)
# Example:
# predict_mask("/kaggle/input/taping-cracks/taping/val/images/ANY.jpg",
#              "segment taping area",
#              "/kaggle/working/ANY__segment_taping_area.png")
""")
print("Helper written:", SAVE_DIR/"inference_helper.py")


Using DATA_ROOT: /kaggle/input/taping-cracks/data copy


  return self.preprocess(images, **kwargs)
  return self.preprocess(images, **kwargs)
Epoch 1/15: 100%|██████████| 2932/2932 [06:45<00:00,  7.23it/s, avg_loss=0.3730]
  return self.preprocess(images, **kwargs)
  return self.preprocess(images, **kwargs)
                                                        

[Epoch 1] Val Dice=0.5670  mIoU=0.4179
Saved: /kaggle/working/ckpts_clipseg/clipseg_best_e1_dice0.5670.pt


  return self.preprocess(images, **kwargs)
  return self.preprocess(images, **kwargs)
Epoch 2/15: 100%|██████████| 2932/2932 [05:32<00:00,  8.82it/s, avg_loss=0.3026]
  return self.preprocess(images, **kwargs)
  return self.preprocess(images, **kwargs)
                                                        

[Epoch 2] Val Dice=0.6219  mIoU=0.4707
Saved: /kaggle/working/ckpts_clipseg/clipseg_best_e2_dice0.6219.pt


  return self.preprocess(images, **kwargs)
  return self.preprocess(images, **kwargs)
Epoch 3/15: 100%|██████████| 2932/2932 [05:32<00:00,  8.82it/s, avg_loss=0.2871]
  return self.preprocess(images, **kwargs)
  return self.preprocess(images, **kwargs)
                                                        

[Epoch 3] Val Dice=0.6413  mIoU=0.4921
Saved: /kaggle/working/ckpts_clipseg/clipseg_best_e3_dice0.6413.pt


  return self.preprocess(images, **kwargs)
  return self.preprocess(images, **kwargs)
Epoch 4/15: 100%|██████████| 2932/2932 [05:31<00:00,  8.84it/s, avg_loss=0.2776]
  return self.preprocess(images, **kwargs)
  return self.preprocess(images, **kwargs)
                                                        

[Epoch 4] Val Dice=0.6503  mIoU=0.5018
Saved: /kaggle/working/ckpts_clipseg/clipseg_best_e4_dice0.6503.pt


  return self.preprocess(images, **kwargs)
  return self.preprocess(images, **kwargs)
Epoch 5/15: 100%|██████████| 2932/2932 [05:32<00:00,  8.81it/s, avg_loss=0.2706]
  return self.preprocess(images, **kwargs)
  return self.preprocess(images, **kwargs)
                                                        

[Epoch 5] Val Dice=0.6510  mIoU=0.5023
Saved: /kaggle/working/ckpts_clipseg/clipseg_best_e5_dice0.6510.pt


  return self.preprocess(images, **kwargs)
  return self.preprocess(images, **kwargs)
Epoch 6/15: 100%|██████████| 2932/2932 [05:32<00:00,  8.82it/s, avg_loss=0.2646]
  return self.preprocess(images, **kwargs)
  return self.preprocess(images, **kwargs)
                                                        

[Epoch 6] Val Dice=0.6584  mIoU=0.5109
Saved: /kaggle/working/ckpts_clipseg/clipseg_best_e6_dice0.6584.pt


  return self.preprocess(images, **kwargs)
  return self.preprocess(images, **kwargs)
Epoch 7/15: 100%|██████████| 2932/2932 [05:32<00:00,  8.81it/s, avg_loss=0.2604]
  return self.preprocess(images, **kwargs)
  return self.preprocess(images, **kwargs)
                                                        

[Epoch 7] Val Dice=0.6577  mIoU=0.5096


  return self.preprocess(images, **kwargs)
  return self.preprocess(images, **kwargs)
Epoch 8/15: 100%|██████████| 2932/2932 [05:32<00:00,  8.83it/s, avg_loss=0.2567]
  return self.preprocess(images, **kwargs)
  return self.preprocess(images, **kwargs)
                                                        

[Epoch 8] Val Dice=0.6619  mIoU=0.5151
Saved: /kaggle/working/ckpts_clipseg/clipseg_best_e8_dice0.6619.pt


  return self.preprocess(images, **kwargs)
  return self.preprocess(images, **kwargs)
Epoch 9/15: 100%|██████████| 2932/2932 [05:31<00:00,  8.84it/s, avg_loss=0.2535]
  return self.preprocess(images, **kwargs)
  return self.preprocess(images, **kwargs)
                                                        

[Epoch 9] Val Dice=0.6629  mIoU=0.5153
Saved: /kaggle/working/ckpts_clipseg/clipseg_best_e9_dice0.6629.pt


  return self.preprocess(images, **kwargs)
  return self.preprocess(images, **kwargs)
Epoch 10/15: 100%|██████████| 2932/2932 [05:32<00:00,  8.82it/s, avg_loss=0.2510]
  return self.preprocess(images, **kwargs)
  return self.preprocess(images, **kwargs)
                                                        

[Epoch 10] Val Dice=0.6650  mIoU=0.5177
Saved: /kaggle/working/ckpts_clipseg/clipseg_best_e10_dice0.6650.pt


  return self.preprocess(images, **kwargs)
  return self.preprocess(images, **kwargs)
Epoch 11/15: 100%|██████████| 2932/2932 [05:32<00:00,  8.82it/s, avg_loss=0.2490]
  return self.preprocess(images, **kwargs)
  return self.preprocess(images, **kwargs)
                                                        

[Epoch 11] Val Dice=0.6668  mIoU=0.5199
Saved: /kaggle/working/ckpts_clipseg/clipseg_best_e11_dice0.6668.pt


  return self.preprocess(images, **kwargs)
  return self.preprocess(images, **kwargs)
Epoch 12/15: 100%|██████████| 2932/2932 [05:32<00:00,  8.83it/s, avg_loss=0.2476]
  return self.preprocess(images, **kwargs)
  return self.preprocess(images, **kwargs)
Epoch 13/15: 100%|██████████| 2932/2932 [05:31<00:00,  8.84it/s, avg_loss=0.2463]
  return self.preprocess(images, **kwargs)
  return self.preprocess(images, **kwargs)
                                                        

[Epoch 13] Val Dice=0.6676  mIoU=0.5209
Saved: /kaggle/working/ckpts_clipseg/clipseg_best_e13_dice0.6676.pt


  return self.preprocess(images, **kwargs)
  return self.preprocess(images, **kwargs)
Epoch 14/15: 100%|██████████| 2932/2932 [05:31<00:00,  8.85it/s, avg_loss=0.2464]
  return self.preprocess(images, **kwargs)
  return self.preprocess(images, **kwargs)
                                                        

[Epoch 14] Val Dice=0.6679  mIoU=0.5212
Saved: /kaggle/working/ckpts_clipseg/clipseg_best_e14_dice0.6679.pt


  return self.preprocess(images, **kwargs)
  return self.preprocess(images, **kwargs)
Epoch 15/15:  42%|████▏     | 1221/2932 [02:19<03:14,  8.79it/s, avg_loss=0.2480]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=10000.0 (msgs/sec)
NotebookApp.rate_limit_window=1.0 (secs)



In [7]:
# =========================
# Test loop (same as val)
# =========================
# Assumes you already ran the training cell so the following exist:
# - DATA_ROOT, SAVE_DIR, device, IMG_SIZE, BATCH, WORKERS
# - processor, model
# - CRACK_PROMPTS_VAL, TAPING_PROMPTS_VAL
# - PromptedSegDataset, ConcatDataset, DataLoader
# - dice_coeff, miou
# - F from torch.nn.functional
#
# If you want to be sure you evaluate the best checkpoint, we (re)load it here.
from pathlib import Path
import numpy as np
import torch
import torch.nn.functional as F
from torch.utils.data import ConcatDataset, DataLoader
from tqdm import tqdm

# ---- (Optional) reload best weights for testing ----
try:
    # If best_path exists from training, prefer it; else fall back to final
    ckpt_to_eval = best_path if ('best_path' in globals() and best_path is not None) else (SAVE_DIR / "clipseg_final.pt")
    if Path(ckpt_to_eval).exists():
        sd = torch.load(ckpt_to_eval, map_location="cpu")
        sd = sd.get("model", sd)
        model.load_state_dict(sd, strict=False)
        model.to(device).eval()
        print("Loaded checkpoint for TEST:", ckpt_to_eval)
    else:
        print("No saved checkpoint found; using in-memory model weights.")
except Exception as e:
    print("Checkpoint reload skipped due to error:", e)

# ---- Build TEST datasets exactly like VAL (no aug, fixed prompts) ----
ds_te_cr = PromptedSegDataset(str(DATA_ROOT/"cracks"/"test"),  CRACK_PROMPTS_VAL,  IMG_SIZE, True,  False, processor)
ds_te_tp = PromptedSegDataset(str(DATA_ROOT/"taping"/"test"), TAPING_PROMPTS_VAL, IMG_SIZE, False, False, processor)
ds_te = ConcatDataset([ds_te_cr, ds_te_tp])
dl_te = DataLoader(ds_te, batch_size=BATCH, shuffle=False, num_workers=WORKERS, pin_memory=True)

# ---- Evaluate (identical to your VAL loop) ----
model.eval(); dices=[]; ious=[]
with torch.no_grad():
    for b in tqdm(dl_te, leave=False, desc="Test"):
        px   = b["pixel_values"].to(device, non_blocking=True)
        ids  = b["input_ids"].to(device, non_blocking=True)
        attn = b["attention_mask"].to(device, non_blocking=True)
        gt   = b["mask"].to(device, non_blocking=True)

        out    = model(pixel_values=px, input_ids=ids, attention_mask=attn)
        logits = out.logits
        gt_rs  = F.interpolate(gt.unsqueeze(1), size=logits.shape[-2:], mode="nearest").squeeze(1)
        prob   = torch.sigmoid(logits)

        dices.append(dice_coeff(prob, gt_rs))
        ious.append(miou(prob, gt_rs))

d_test = float(np.mean(dices)) if len(dices) else float("nan")
m_test = float(np.mean(ious))  if len(ious)  else float("nan")
print(f"[TEST] Dice={d_test:.4f}  mIoU={m_test:.4f}  (N={len(dices)})")


Loaded checkpoint for TEST: /kaggle/working/ckpts_clipseg/clipseg_best_e15_dice0.6681.pt


  return self.preprocess(images, **kwargs)
  return self.preprocess(images, **kwargs)
                                                     

[TEST] Dice=0.7106  mIoU=0.5625  (N=28)


