In [2]:
!pip install sam2

Collecting sam2
  Downloading sam2-1.1.0.tar.gz (152 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m152.8/152.8 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting hydra-core>=1.3.2 (from sam2)
  Downloading hydra_core-1.3.2-py3-none-any.whl.metadata (5.5 kB)
Collecting iopath>=0.1.10 (from sam2)
  Downloading iopath-0.1.10.tar.gz (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting portalocker (from iopath>=0.1.10->sam2)
  Downloading portalocker-3.2.0-py3-none-any.whl.metadata (8.7 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.5.1->sam2)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (

In [3]:
!wget -O /kaggle/working/sam2.1_hiera_tiny.pt \
  https://dl.fbaipublicfiles.com/segment_anything_2/092824/sam2.1_hiera_tiny.pt

--2025-10-06 11:01:31--  https://dl.fbaipublicfiles.com/segment_anything_2/092824/sam2.1_hiera_tiny.pt
Resolving dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)... 99.84.118.60, 99.84.118.67, 99.84.118.117, ...
Connecting to dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)|99.84.118.60|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 156008466 (149M) [application/vnd.snesdev-page-table]
Saving to: ‘/kaggle/working/sam2.1_hiera_tiny.pt’


2025-10-06 11:01:32 (318 MB/s) - ‘/kaggle/working/sam2.1_hiera_tiny.pt’ saved [156008466/156008466]



____
## Fine-tuning SAM 2.1 (Point-Prompted, Initial Loss)

In [4]:
# Fine-tune SAM 2.1 (image) on /kaggle/input/taping-cracks (cracks + taping)
# Saves to /kaggle/working/ckpts_sam2
# No data augmentation (only resize). Trains prompt encoder + mask decoder.
# Inference helper at the end auto-prompts by scanning a small point grid.

import os, glob, random, json, math
from pathlib import Path
import numpy as np
import cv2
from PIL import Image
from tqdm import tqdm

import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, ConcatDataset

# --- SAM 2.1 imports (install from the official repo beforehand) ---
# !pip -q install -U "git+https://github.com/facebookresearch/sam2.git"
from sam2.build_sam import build_sam2
from sam2.sam2_image_predictor import SAM2ImagePredictor

# =========================
# CONFIG
# =========================
BASE_CANDIDATES = [
    "/kaggle/input/taping-cracks",
    "/kaggle/input/taping-cracks/data copy",
    "/kaggle/input/taping-cracks/data_copy",
]
SAVE_DIR   = Path("/kaggle/working/ckpts_sam2")
IMG_SIZE   = 1024     # SAM2 default training size used in refs
BATCH_STEPS= 1        # we optimize per-sample; use grad-accum to simulate batch
EPOCHS     = 10
LR         = 5e-5
SEED       = 42
WORKERS    = 2
WEIGHT_DECAY = 1e-4
ACCUM_STEPS  = 8
STEP_LR_SIZE  = 2000
STEP_LR_GAMMA = 0.6

# SAM2.1 config + checkpoint (match pair!)
# IMPORTANT: use a Hydra config NAME (no .yaml, no absolute path).
MODEL_CFG = "configs/sam2.1/sam2.1_hiera_t"
CKPT_PATH = "/kaggle/working/sam2.1_hiera_tiny.pt"  # put your downloaded ckpt here

# =========================
# Utils
# =========================
def set_seed(s=SEED):
    random.seed(s); np.random.seed(s)
    torch.manual_seed(s); torch.cuda.manual_seed_all(s)
    torch.backends.cudnn.benchmark=False; torch.backends.cudnn.deterministic=True

def find_data_root(candidates):
    for base in candidates:
        root = Path(base)
        if (root/"cracks").exists() and (root/"taping").exists():
            return root
        if root.exists():
            for sub in root.iterdir():
                if sub.is_dir() and (sub/"cracks").exists() and (sub/"taping").exists():
                    return sub
    raise FileNotFoundError("Dataset not found in: " + ", ".join(candidates))

def load_pair(img_path, mask_path):
    img = cv2.cvtColor(cv2.imread(img_path, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB)
    if mask_path and os.path.exists(mask_path):
        m = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
    else:
        m = np.zeros(img.shape[:2], np.uint8)
    # hard resize only (no aug)
    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE), interpolation=cv2.INTER_LINEAR)
    m   = cv2.resize(m,   (IMG_SIZE, IMG_SIZE), interpolation=cv2.INTER_NEAREST)
    m   = (m > 127).astype(np.uint8)
    return img, m

def sample_points_from_mask(bin_mask, max_points=8):
    """Return K positive points (y, x) uniformly from foreground pixels; empty if none."""
    ys, xs = np.where(bin_mask > 0)
    if len(ys) == 0:
        return np.zeros((0,2), np.int32)
    idx = np.random.choice(len(ys), size=min(max_points, len(ys)), replace=False)
    pts = np.stack([ys[idx], xs[idx]], axis=1)  # (k,2) in (y,x)
    return pts

def _ensure_batched(coords, labels):
    """
    Ensure shapes compatible with sam_prompt_encoder:
      coords: (B,K,2), labels: (B,K).
    Accepts coords (K,2), labels (K,) and adds batch dim; also squeezes (B,K,1)->(B,K).
    """
    if coords is None or labels is None:
        return coords, labels
    if hasattr(coords, "ndim") and coords.ndim == 2:
        coords = coords[None, ...]
    if hasattr(labels, "ndim"):
        if labels.ndim == 1:
            labels = labels[None, ...]
        elif labels.ndim == 3 and labels.shape[-1] == 1:
            labels = labels.squeeze(-1)
    return coords, labels

# =========================
# Dataset
# =========================
class ImgMaskDataset(Dataset):
    """split_dir: .../cracks/train or .../taping/val; no augmentation"""
    def __init__(self, split_dir):
        self.root = Path(split_dir)
        self.img_dir = self.root/"images"
        self.mask_dir= self.root/"masks"
        self.items=[]
        for ip in sorted(glob.glob(str(self.img_dir/"*"))):
            base = Path(ip).stem
            mp=None
            for ext in (".png",".jpg",".jpeg"):
                cand = self.mask_dir/f"{base}{ext}"
                if cand.exists(): mp=str(cand); break
            self.items.append((ip, mp))
        if not self.items:
            raise RuntimeError(f"No images under {self.img_dir}")
    def __len__(self): return len(self.items)
    def __getitem__(self, i):
        ip, mp = self.items[i]
        img, mask = load_pair(ip, mp)
        # points sampled from GT mask (SAM2 expects prompts)
        pts = sample_points_from_mask(mask, max_points=8)   # (k,2) in (y,x)
        # Pack numpy -> predictor expects: image HWC uint8, mask uint8
        return {"image": img, "mask": mask.astype(np.float32), "points_yx": pts, "path": ip}

def build_concat(ds_root: Path, split: str):
    # crack + taping for given split
    parts = []
    for sub in ["cracks", "taping"]:
        p = ds_root/sub/split
        if p.exists():
            parts.append(ImgMaskDataset(str(p)))
    if not parts:
        raise RuntimeError(f"Missing split={split}")
    return ConcatDataset(parts)

# =========================
# Model / Trainer
# =========================
def bce_loss_from_prob(prob, target):
    # prob & target: (N, H, W) with prob in [0,1]
    eps=1e-6
    return (-target*torch.log(prob+eps) - (1-target)*torch.log(1-prob+eps)).mean()

def compute_iou(bin_pred, bin_gt, eps=1e-6):
    inter = (bin_pred & bin_gt).sum(dim=(1,2)).float()
    union = bin_pred.sum(dim=(1,2)) + bin_gt.sum(dim=(1,2)) - inter
    return (inter+eps)/(union+eps)

def train_one_step(predictor, batch, optimizer, scaler, step, device):
    """
    Recipe:
    - set_image(image)
    - _prep_prompts (points as foreground=1)
    - prompt_encoder + mask_decoder
    - upsample -> sigmoid -> BCE
    - score loss aligns predicted IoU with GT IoU
    """
    img = batch["image"]                 # HWC uint8
    gt  = batch["mask"]                  # HW float32 (0/1)
    pts = batch["points_yx"]             # (K,2) or (0,2)

    if pts.shape[0] == 0:
        return None  # skip unlabeled images

    # IMPORTANT: labels should be 1-D (K,), not (K,1)
    input_point = np.stack([pts[:,1], pts[:,0]], axis=1).astype(np.float32)  # (K,2) -> (x,y)
    input_label = np.ones((input_point.shape[0],), dtype=np.float32)         # (K,)

    with torch.amp.autocast(device_type='cuda', enabled=(device.type=='cuda')):
        predictor.set_image(img)  # encodes the image & caches features

        # Prepare prompts
        mask_input, unnorm_coords, labels, unnorm_box = predictor._prep_prompts(
            input_point, input_label, box=None, mask_logits=None, normalize_coords=True
        )
        # Force shapes to (B,K,2) and (B,K)
        unnorm_coords, labels = _ensure_batched(unnorm_coords, labels)
        if unnorm_coords is None or labels is None or unnorm_coords.shape[1] == 0:
            return None

        # Encode prompts, decode masks
        sparse_embeddings, dense_embeddings = predictor.model.sam_prompt_encoder(
            points=(unnorm_coords, labels), boxes=None, masks=None
        )
        batched_mode = unnorm_coords.shape[0] > 1
        high_res_features = [feat_level[-1].unsqueeze(0) for feat_level in predictor._features["high_res_feats"]]

        low_res_masks, prd_scores, _, _ = predictor.model.sam_mask_decoder(
            image_embeddings = predictor._features["image_embed"][-1].unsqueeze(0),
            image_pe         = predictor.model.sam_prompt_encoder.get_dense_pe(),
            sparse_prompt_embeddings = sparse_embeddings,
            dense_prompt_embeddings  = dense_embeddings,
            multimask_output = True,
            repeat_image     = batched_mode,
            high_res_features= high_res_features,
        )
        # Upsample to original HxW then sigmoid -> prob
        prd_masks = predictor._transforms.postprocess_masks(low_res_masks, predictor._orig_hw[-1])  # logits
        prob = torch.sigmoid(prd_masks[:, 0])  # (N=1, H, W)

        gt_t  = torch.from_numpy(gt).to(prob.device)[None, ...]  # (1,H,W)

        seg_loss = bce_loss_from_prob(prob, gt_t)

        # IoU supervision on the score head
        bin_pred = (prob > 0.5)
        bin_gt   = (gt_t > 0.5)
        iou = compute_iou(bin_pred, bin_gt)  # (1,)
        score_loss = torch.abs(prd_scores[:, 0] - iou).mean()

        loss = seg_loss + 0.05*score_loss
        loss = loss / ACCUM_STEPS

    scaler.scale(loss).backward()
    torch.nn.utils.clip_grad_norm_(predictor.model.parameters(), max_norm=1.0)

    if step % ACCUM_STEPS == 0:
        scaler.step(optimizer)
        scaler.update()
        predictor.model.zero_grad()

    return {
        "loss": float(loss.detach().cpu()) * ACCUM_STEPS,
        "seg_loss": float(seg_loss.detach().cpu()),
        "iou": float(iou.detach().cpu().mean())
    }

# =========================
# Run
# =========================
set_seed(SEED)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
SAVE_DIR.mkdir(parents=True, exist_ok=True)

DATA_ROOT = find_data_root(BASE_CANDIDATES)
print("Using DATA_ROOT:", DATA_ROOT)

train_set = build_concat(DATA_ROOT, "train")
val_set   = build_concat(DATA_ROOT, "val")

# Per-sample iteration (SAM2 predictor caches one image at a time cleanly)
dl_tr = DataLoader(train_set, batch_size=1, shuffle=True,  num_workers=WORKERS, pin_memory=True)
dl_va = DataLoader(val_set,   batch_size=1, shuffle=False, num_workers=WORKERS, pin_memory=True)

# Build SAM2.1 model + predictor
assert os.path.exists(CKPT_PATH), f"Checkpoint not found: {CKPT_PATH}"
# IMPORTANT: MODEL_CFG must be a Hydra config NAME (e.g., "configs/sam2.1/sam2.1_hiera_t")
sam2_model = build_sam2(MODEL_CFG, CKPT_PATH, device=device.type)
predictor = SAM2ImagePredictor(sam2_model)

# Freeze image encoder; train prompt encoder + mask decoder
if hasattr(predictor.model, "image_encoder"):
    for p in predictor.model.image_encoder.parameters(): p.requires_grad = False
for p in predictor.model.sam_prompt_encoder.parameters(): p.requires_grad = True
for p in predictor.model.sam_mask_decoder.parameters():  p.requires_grad = True
predictor.model.sam_prompt_encoder.train(True)
predictor.model.sam_mask_decoder.train(True)

# Optim & sched
trainable_params = [p for p in predictor.model.parameters() if p.requires_grad]
opt = torch.optim.AdamW(trainable_params, lr=LR, weight_decay=WEIGHT_DECAY)
sched = torch.optim.lr_scheduler.StepLR(opt, step_size=STEP_LR_SIZE, gamma=STEP_LR_GAMMA)

use_amp = (device.type == "cuda")
scaler = torch.amp.GradScaler(enabled=use_amp)

best_iou, best_path = -1.0, None
global_step = 0

for epoch in range(1, EPOCHS+1):
    predictor.model.train(True)
    running, running_iou = 0.0, 0.0
    pbar = tqdm(dl_tr, desc=f"Epoch {epoch}/{EPOCHS}")

    for batch in pbar:
        # unpack (we purposely keep batch_size=1)
        feed = {
            "image":      batch["image"][0].numpy(),
            "mask":       batch["mask"][0].numpy(),
            "points_yx":  batch["points_yx"][0].numpy(),  # (K,2)
            "path":       batch["path"][0],
        }
        metrics = train_one_step(predictor, feed, opt, scaler, global_step+1, device)
        global_step += 1
        if metrics is None: 
            continue
        running += metrics["loss"]
        running_iou += metrics["iou"]
        if global_step % ACCUM_STEPS == 0:
            sched.step()
        pbar.set_postfix(loss=f"{running/(global_step or 1):.4f}",
                         iou=f"{running_iou/(global_step or 1):.3f}",
                         lr=f"{opt.param_groups[0]['lr']:.2e}")

    # -------- Validation (quick IoU) --------
    predictor.model.eval()
    with torch.no_grad(), torch.amp.autocast(device_type='cuda', enabled=use_amp):
        ious=[]
        for b in tqdm(dl_va, leave=False, desc="Valid"):
            img = b["image"][0].numpy()
            gt  = b["mask"][0].numpy()
            pts = b["points_yx"][0].numpy()
            if pts.shape[0]==0: 
                continue
            input_point = np.stack([pts[:,1], pts[:,0]], axis=1).astype(np.float32)  # (K,2)
            input_label = np.ones((input_point.shape[0],), dtype=np.float32)         # (K,)

            predictor.set_image(img)
            mask_input, unnorm_coords, labels, unnorm_box = predictor._prep_prompts(
                input_point, input_label, box=None, mask_logits=None, normalize_coords=True
            )
            unnorm_coords, labels = _ensure_batched(unnorm_coords, labels)
            if unnorm_coords is None or labels is None or unnorm_coords.shape[1]==0:
                continue

            sparse_embeddings, dense_embeddings = predictor.model.sam_prompt_encoder(
                points=(unnorm_coords, labels), boxes=None, masks=None
            )
            batched_mode = unnorm_coords.shape[0] > 1
            high_res_features = [feat_level[-1].unsqueeze(0) for feat_level in predictor._features["high_res_feats"]]
            low_res_masks, prd_scores, _, _ = predictor.model.sam_mask_decoder(
                image_embeddings=predictor._features["image_embed"][-1].unsqueeze(0),
                image_pe=predictor.model.sam_prompt_encoder.get_dense_pe(),
                sparse_prompt_embeddings=sparse_embeddings,
                dense_prompt_embeddings=dense_embeddings,
                multimask_output=True,
                repeat_image=batched_mode,
                high_res_features=high_res_features,
            )
            prd_masks = predictor._transforms.postprocess_masks(low_res_masks, predictor._orig_hw[-1])
            prob = torch.sigmoid(prd_masks[:, 0])
            iou = compute_iou((prob>0.5), torch.from_numpy(gt).to(prob.device)[None,...]>0.5)
            ious.append(float(iou.mean().detach().cpu()))
    mean_iou = float(np.mean(ious)) if ious else 0.0
    print(f"[Epoch {epoch}] Val mean IoU={mean_iou:.4f}")

    # Save best
    if mean_iou > best_iou:
        best_iou = mean_iou
        best_path = SAVE_DIR/f"sam2.1_best_e{epoch}_miou{mean_iou:.4f}.pt"
        torch.save(predictor.model.state_dict(), best_path)
        print("Saved:", best_path)

# Final save
final_path = SAVE_DIR/"sam2.1_final.pt"
torch.save(predictor.model.state_dict(), final_path)
print("Final:", final_path, "| Best:", best_path)

# --- Inference helper: auto-prompt via sparse point grid, take best mask by score ---
with open(SAVE_DIR/"inference_helper_sam2.py", "w") as f:
    f.write(f"""import os, math, numpy as np, torch, cv2
from sam2.build_sam import build_sam2
from sam2.sam2_image_predictor import SAM2ImagePredictor

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MODEL_CFG = r"configs/sam2.1/sam2.1_hiera_t"   # Hydra config NAME
CKPT      = r"{CKPT_PATH}"
sam2 = build_sam2(MODEL_CFG, CKPT, device=device.type)
predictor = SAM2ImagePredictor(sam2)

# Load your finetuned weights
FT = r"{str(best_path if best_path else final_path)}"
sd = torch.load(FT, map_location="cpu")
predictor.model.load_state_dict(sd, strict=False)
predictor.model.eval()

def _grid_points(h, w, n=9):
    # n points over the image (rough grid)
    r = int(math.sqrt(n))
    ys = np.linspace(h*0.15, h*0.85, r).astype(np.int32)
    xs = np.linspace(w*0.15, w*0.85, r).astype(np.int32)
    pts = np.array([(y,x) for y in ys for x in xs], dtype=np.int32)
    return pts

def _ensure_batched(coords, labels):
    if coords is None or labels is None:
        return coords, labels
    if hasattr(coords, "ndim") and coords.ndim == 2:
        coords = coords[None, ...]
    if hasattr(labels, "ndim"):
        if labels.ndim == 1:
            labels = labels[None, ...]
        elif labels.ndim == 3 and labels.shape[-1] == 1:
            labels = labels.squeeze(-1)
    return coords, labels

def predict_mask(img_path, out_png, thr=0.5, n_points=9):
    img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB)
    ih, iw = img.shape[:2]
    img_r = cv2.resize(img, ({IMG_SIZE}, {IMG_SIZE}), interpolation=cv2.INTER_LINEAR)

    predictor.set_image(img_r)
    pts = _grid_points({IMG_SIZE}, {IMG_SIZE}, n=n_points)
    ipt = np.stack([pts[:,1], pts[:,0]], axis=1).astype(np.float32)  # (K,2)
    ilb = np.ones((ipt.shape[0],), dtype=np.float32)                 # (K,)

    # Prep prompts and run decoder
    mask_input, unnorm_coords, labels, unnorm_box = predictor._prep_prompts(
        ipt, ilb, box=None, mask_logits=None, normalize_coords=True
    )
    unnorm_coords, labels = _ensure_batched(unnorm_coords, labels)

    sparse_embeddings, dense_embeddings = predictor.model.sam_prompt_encoder(
        points=(unnorm_coords, labels), boxes=None, masks=None
    )
    batched_mode = unnorm_coords.shape[0] > 1
    high_res_features = [feat_level[-1].unsqueeze(0) for feat_level in predictor._features["high_res_feats"]]
    low_res_masks, prd_scores, _, _ = predictor.model.sam_mask_decoder(
        image_embeddings=predictor._features["image_embed"][-1].unsqueeze(0),
        image_pe=predictor.model.sam_prompt_encoder.get_dense_pe(),
        sparse_prompt_embeddings=sparse_embeddings,
        dense_prompt_embeddings=dense_embeddings,
        multimask_output=True,
        repeat_image=batched_mode,
        high_res_features=high_res_features,
    )
    up = predictor._transforms.postprocess_masks(low_res_masks, predictor._orig_hw[-1])  # logits
    prob = torch.sigmoid(up[:,0]).detach().cpu().numpy()  # [num_prompts, H, W]
    sc   = prd_scores[:,0].detach().cpu().numpy()         # [num_prompts]
    best = prob[sc.argmax()]                               # (H,W) in resized space
    best = cv2.resize((best>thr).astype("uint8")*255, (iw, ih), interpolation=cv2.INTER_NEAREST)
    cv2.imwrite(out_png, best)
    return out_png
# Example:
# predict_mask("/kaggle/input/taping-cracks/taping/val/images/ANY.jpg", "/kaggle/working/ANY_sam2.png")
""")
print("Helper written:", SAVE_DIR/"inference_helper_sam2.py")


ModuleNotFoundError: No module named 'sam2'

In [1]:
# ================================
# Test-set evaluation for your fine-tuned SAM 2.1 model
# Metrics: mean IoU (mIoU) and mean Dice
# - Matches your train/val recipe: resize to IMG_SIZE and use GT-positive points as prompts
# - Averages over multiple random GT-point samplings per image to reduce variance
# - Skips images with empty GT masks (reports count)
# - Saves a per-image CSV of IoU/Dice to /kaggle/working/test_metrics.csv
# ================================

import os, glob, random, math, csv
from pathlib import Path
import numpy as np
import cv2
from tqdm import tqdm

import torch
from torch.utils.data import Dataset, DataLoader, ConcatDataset

# --- SAM 2.1 imports (assumes installed) ---
# !pip -q install -U "git+https://github.com/facebookresearch/sam2.git"
from sam2.build_sam import build_sam2
from sam2.sam2_image_predictor import SAM2ImagePredictor

# -------------------------
# CONFIG (align with training)
# -------------------------
BASE_CANDIDATES = [
    "/kaggle/input/taping-cracks",
    "/kaggle/input/taping-cracks/data copy",
    "/kaggle/input/taping-cracks/data_copy",
]
IMG_SIZE   = 1024
WORKERS    = 2
SEED       = 42

# Base SAM2.1 (same pair you used to fine-tune)
MODEL_CFG  = "configs/sam2.1/sam2.1_hiera_t"
CKPT_PATH  = "/kaggle/working/sam2.1_hiera_tiny.pt"  # must exist

# >>> Your fine-tuned weights (given path) <<<
FT_PATH = "/kaggle/input/seg_crack_taping_.62iou/pytorch/default/1/sam2.1_best_e6_miou0.6201.pt"

# Evaluation behavior
THRESH                  = 0.5    # binarization of predicted prob
PROMPTS_PER_IMAGE       = 3      # average N random GT samplings per image
MAX_POINTS_PER_PROMPT   = 8      # up to K positive points per sampling
SAVE_PER_IMAGE_CSV      = True
CSV_PATH                = "/kaggle/working/test_metrics.csv"

# -------------------------
# Utils
# -------------------------
def set_seed(s=SEED):
    random.seed(s); np.random.seed(s)
    torch.manual_seed(s); torch.cuda.manual_seed_all(s)
    torch.backends.cudnn.benchmark=False; torch.backends.cudnn.deterministic=True

def find_data_root(candidates):
    for base in candidates:
        root = Path(base)
        if (root/"cracks").exists() and (root/"taping").exists():
            return root
        if root.exists():
            for sub in root.iterdir():
                if sub.is_dir() and (sub/"cracks").exists() and (sub/"taping").exists():
                    return sub
    raise FileNotFoundError("Dataset not found in: " + ", ".join(candidates))

def load_pair(img_path, mask_path):
    img = cv2.cvtColor(cv2.imread(img_path, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB)
    if mask_path and os.path.exists(mask_path):
        m = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
    else:
        m = np.zeros(img.shape[:2], np.uint8)
    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE), interpolation=cv2.INTER_LINEAR)
    m   = cv2.resize(m,   (IMG_SIZE, IMG_SIZE), interpolation=cv2.INTER_NEAREST)
    m   = (m > 127).astype(np.uint8)
    return img, m

def sample_points_from_mask(bin_mask, max_points=8):
    ys, xs = np.where(bin_mask > 0)
    if len(ys) == 0:
        return np.zeros((0,2), np.int32)
    k = min(max_points, len(ys))
    idx = np.random.choice(len(ys), size=k, replace=False)
    return np.stack([ys[idx], xs[idx]], axis=1).astype(np.int32)  # (k,2) in (y,x)

def _ensure_batched(coords, labels):
    if coords is None or labels is None:
        return coords, labels
    if hasattr(coords, "ndim") and coords.ndim == 2:
        coords = coords[None, ...]
    if hasattr(labels, "ndim"):
        if labels.ndim == 1:
            labels = labels[None, ...]
        elif labels.ndim == 3 and labels.shape[-1] == 1:
            labels = labels.squeeze(-1)
    return coords, labels

def compute_iou_torch(bin_pred, bin_gt, eps=1e-6):
    inter = (bin_pred & bin_gt).sum(dim=(1,2)).float()
    union = bin_pred.sum(dim=(1,2)).float() + bin_gt.sum(dim=(1,2)).float() - inter
    return (inter + eps) / (union + eps)

def compute_dice_torch(bin_pred, bin_gt, eps=1e-6):
    inter = (bin_pred & bin_gt).sum(dim=(1,2)).float()
    card  = bin_pred.sum(dim=(1,2)).float() + bin_gt.sum(dim=(1,2)).float()
    return (2.0 * inter + eps) / (card + eps)

# -------------------------
# Dataset
# -------------------------
class ImgMaskDataset(Dataset):
    """split_dir: .../cracks/test or .../taping/test; no augmentation"""
    def __init__(self, split_dir):
        self.root = Path(split_dir)
        self.img_dir = self.root/"images"
        self.mask_dir= self.root/"masks"
        self.items=[]
        for ip in sorted(glob.glob(str(self.img_dir/"*"))):
            base = Path(ip).stem
            mp=None
            for ext in (".png",".jpg",".jpeg"):
                cand = self.mask_dir/f"{base}{ext}"
                if cand.exists(): mp=str(cand); break
            self.items.append((ip, mp))
        if not self.items:
            raise RuntimeError(f"No images under {self.img_dir}")
    def __len__(self): return len(self.items)
    def __getitem__(self, i):
        ip, mp = self.items[i]
        img, mask = load_pair(ip, mp)
        return {"image": img, "mask": mask.astype(np.float32), "path": ip}

def build_concat(ds_root: Path, split: str):
    parts = []
    for sub in ["cracks", "taping"]:
        p = ds_root/sub/split
        if p.exists():
            parts.append(ImgMaskDataset(str(p)))
    if not parts:
        raise RuntimeError(f"Missing split={split}")
    return ConcatDataset(parts)

# -------------------------
# Load model + data
# -------------------------
set_seed(SEED)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

assert os.path.exists(CKPT_PATH), f"Base checkpoint not found: {CKPT_PATH}"
assert os.path.exists(FT_PATH),   f"Finetuned weights not found: {FT_PATH}"

sam2_model = build_sam2(MODEL_CFG, CKPT_PATH, device=device.type)
predictor  = SAM2ImagePredictor(sam2_model)

# Load finetuned weights
sd = torch.load(FT_PATH, map_location="cpu")
missing, unexpected = predictor.model.load_state_dict(sd, strict=False)
if missing or unexpected:
    print(f"[load_state_dict] missing={len(missing)} unexpected={len(unexpected)}")

predictor.model.eval()

DATA_ROOT = find_data_root(BASE_CANDIDATES)
print("Using DATA_ROOT:", DATA_ROOT)

# Prefer explicit 'test' split; fallback to 'val' if not present
try:
    test_set = build_concat(DATA_ROOT, "test")
except RuntimeError:
    print("[WARN] 'test' split not found -> using 'val' for evaluation.")
    test_set = build_concat(DATA_ROOT, "val")

dl_te = DataLoader(test_set, batch_size=1, shuffle=False,
                   num_workers=WORKERS, pin_memory=True)

# -------------------------
# Evaluation
# -------------------------
use_amp = (device.type == "cuda")
all_iou, all_dice = [], []
per_image_rows = []
skipped_empty = 0

with torch.no_grad():
    for b in tqdm(dl_te, desc="Evaluate(test)"):
        img = b["image"][0].numpy()           # HWC uint8 (resized)
        gt  = b["mask"][0].numpy().astype(np.uint8)  # (H,W) {0,1}
        path= b["path"][0]

        # Skip empty GT (no positive; same policy as your val loop)
        if gt.sum() == 0:
            skipped_empty += 1
            continue

        # Cache features once per image
        with torch.amp.autocast(device_type='cuda', enabled=use_amp):
            predictor.set_image(img)

        iou_trials, dice_trials = [], []
        for _ in range(PROMPTS_PER_IMAGE):
            pts = sample_points_from_mask(gt, max_points=MAX_POINTS_PER_PROMPT)  # (k,2) in (y,x)
            if pts.shape[0] == 0:
                break

            input_point = np.stack([pts[:,1], pts[:,0]], axis=1).astype(np.float32)  # (K,2) -> (x,y)
            input_label = np.ones((input_point.shape[0],), dtype=np.float32)         # (K,)

            with torch.amp.autocast(device_type='cuda', enabled=use_amp):
                mask_input, unnorm_coords, labels, unnorm_box = predictor._prep_prompts(
                    input_point, input_label, box=None, mask_logits=None, normalize_coords=True
                )
                unnorm_coords, labels = _ensure_batched(unnorm_coords, labels)

                sparse_embeddings, dense_embeddings = predictor.model.sam_prompt_encoder(
                    points=(unnorm_coords, labels), boxes=None, masks=None
                )
                batched_mode = unnorm_coords.shape[0] > 1
                high_res_features = [feat_level[-1].unsqueeze(0) for feat_level in predictor._features["high_res_feats"]]
                low_res_masks, prd_scores, _, _ = predictor.model.sam_mask_decoder(
                    image_embeddings=predictor._features["image_embed"][-1].unsqueeze(0),
                    image_pe=predictor.model.sam_prompt_encoder.get_dense_pe(),
                    sparse_prompt_embeddings=sparse_embeddings,
                    dense_prompt_embeddings=dense_embeddings,
                    multimask_output=True,
                    repeat_image=batched_mode,
                    high_res_features=high_res_features,
                )
                up   = predictor._transforms.postprocess_masks(low_res_masks, predictor._orig_hw[-1])  # logits
                prob = torch.sigmoid(up[:,0])  # (N=1,H,W) on device

            # Threshold + metrics (single mask)
            pred_bin = (prob > THRESH)
            gt_t     = torch.from_numpy(gt).to(prob.device)[None, ...].bool()

            iou  = compute_iou_torch(pred_bin, gt_t).mean().item()
            dice = compute_dice_torch(pred_bin, gt_t).mean().item()

            iou_trials.append(iou)
            dice_trials.append(dice)

        if len(iou_trials) == 0:
            skipped_empty += 1
            continue

        iou_mean  = float(np.mean(iou_trials))
        dice_mean = float(np.mean(dice_trials))

        all_iou.append(iou_mean)
        all_dice.append(dice_mean)
        per_image_rows.append((path, iou_mean, dice_mean))

# -------------------------
# Report + optional CSV
# -------------------------
num_used = len(all_iou)
print(f"\nImages used: {num_used} | Skipped (empty GT or no prompts): {skipped_empty}")
if num_used > 0:
    print(f"Test mIoU : {np.mean(all_iou):.4f}")
    print(f"Test Dice : {np.mean(all_dice):.4f}")
else:
    print("No images evaluated.")

if SAVE_PER_IMAGE_CSV and num_used > 0:
    with open(CSV_PATH, "w", newline="") as f:
        w = csv.writer(f)
        w.writerow(["image_path", "iou", "dice"])
        w.writerows(per_image_rows)
    print("Per-image metrics saved to:", CSV_PATH)


Using DATA_ROOT: /kaggle/input/taping-cracks/data copy


Evaluate(test): 100%|██████████| 165/165 [00:20<00:00,  7.94it/s]


Images used: 165 | Skipped (empty GT or no prompts): 0
Test mIoU : 0.6407
Test Dice : 0.7747
Per-image metrics saved to: /kaggle/working/test_metrics.csv





_____
## Fine-tuning SAM 2.1 (Point-Prompted, Improved Loss)

In [1]:
# Fine-tune SAM 2.1 (image) on /kaggle/input/taping-cracks (cracks + taping)
# Saves to /kaggle/working/ckpts_sam2
# No data augmentation (only resize). Trains prompt encoder + mask decoder.
# Computes mIoU & Dice on val after each epoch.

import os, glob, random, math, json
from pathlib import Path
import numpy as np
import cv2
from PIL import Image
from tqdm import tqdm

import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, ConcatDataset

# --- SAM 2.1 imports (install beforehand) ---
# !pip -q install -U "git+https://github.com/facebookresearch/sam2.git"
from sam2.build_sam import build_sam2
from sam2.sam2_image_predictor import SAM2ImagePredictor

# =========================
# CONFIG
# =========================
BASE_CANDIDATES = [
    "/kaggle/input/taping-cracks",
    "/kaggle/input/taping-cracks/data copy",
    "/kaggle/input/taping-cracks/data_copy",
]
SAVE_DIR   = Path("/kaggle/working/ckpts_sam2")
IMG_SIZE   = 1024
EPOCHS     = 10
LR         = 5e-5
SEED       = 42
WORKERS    = 2
WEIGHT_DECAY = 1e-4
CLIP_NORM    = 1.0

# SAM2.1 config + checkpoint (match pair!)
MODEL_CFG = "configs/sam2.1/sam2.1_hiera_t"     # Hydra config NAME
CKPT_PATH = "/kaggle/working/sam2.1_hiera_tiny.pt"  # download & place here

# =========================
# Utils
# =========================
def set_seed(s=SEED):
    random.seed(s); np.random.seed(s)
    torch.manual_seed(s); torch.cuda.manual_seed_all(s)
    torch.backends.cudnn.benchmark=False; torch.backends.cudnn.deterministic=True

def find_data_root(candidates):
    for base in candidates:
        root = Path(base)
        if (root/"cracks").exists() and (root/"taping").exists():
            return root
        if root.exists():
            for sub in root.iterdir():
                if sub.is_dir() and (sub/"cracks").exists() and (sub/"taping").exists():
                    return sub
    raise FileNotFoundError("Dataset not found in: " + ", ".join(candidates))

def load_pair(img_path, mask_path):
    img = cv2.cvtColor(cv2.imread(img_path, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB)
    if mask_path and os.path.exists(mask_path):
        m = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
    else:
        m = np.zeros(img.shape[:2], np.uint8)
    # hard resize only (no aug)
    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE), interpolation=cv2.INTER_LINEAR)
    m   = cv2.resize(m,   (IMG_SIZE, IMG_SIZE), interpolation=cv2.INTER_NEAREST)
    m   = (m > 127).astype(np.uint8)
    return img, m

def sample_points_from_mask(bin_mask, max_points=8):
    ys, xs = np.where(bin_mask > 0)
    if len(ys) == 0:
        return np.zeros((0,2), np.int32)
    idx = np.random.choice(len(ys), size=min(max_points, len(ys)), replace=False)
    pts = np.stack([ys[idx], xs[idx]], axis=1)  # (k,2) in (y,x)
    return pts

def _ensure_batched(coords, labels):
    if coords is None or labels is None:
        return coords, labels
    if hasattr(coords, "ndim") and coords.ndim == 2:
        coords = coords[None, ...]
    if hasattr(labels, "ndim"):
        if labels.ndim == 1:
            labels = labels[None, ...]
        elif labels.ndim == 3 and labels.shape[-1] == 1:
            labels = labels.squeeze(-1)
    return coords, labels

def dice_from_prob(prob, target, eps=1e-6):
    # prob, target: (N, H, W) in [0,1]
    inter = (prob * target).sum(dim=(-2, -1))
    union = prob.sum(dim=(-2, -1)) + target.sum(dim=(-2, -1))
    return (2 * inter + eps) / (union + eps)  # (N,)

def miou_from_bin(pred_bin, tgt_bin, eps=1e-6):
    # pred_bin, tgt_bin: bool byte tensors (N,H,W)
    inter = (pred_bin & tgt_bin).sum(dim=(-2, -1)).float()
    union = pred_bin.sum(dim=(-2, -1)) + tgt_bin.sum(dim=(-2, -1)) - inter
    return (inter + eps) / (union + eps)     # (N,)

# =========================
# Dataset
# =========================
class ImgMaskDataset(Dataset):
    """split_dir: .../cracks/train or .../taping/val; no augmentation"""
    def __init__(self, split_dir):
        self.root = Path(split_dir)
        self.img_dir = self.root/"images"
        self.mask_dir= self.root/"masks"
        self.items=[]
        for ip in sorted(glob.glob(str(self.img_dir/"*"))):
            base = Path(ip).stem
            mp=None
            for ext in (".png",".jpg",".jpeg",".bmp",".tif",".jpg"):
                cand = self.mask_dir/f"{base}{ext}"
                if cand.exists(): mp=str(cand); break
            self.items.append((ip, mp))
        if not self.items:
            raise RuntimeError(f"No images under {self.img_dir}")
    def __len__(self): return len(self.items)
    def __getitem__(self, i):
        ip, mp = self.items[i]
        img, mask = load_pair(ip, mp)
        pts = sample_points_from_mask(mask, max_points=8)   # (k,2) in (y,x)
        return {
            "image": img,                                 # HWC uint8
            "mask": mask.astype(np.float32),              # HW float32 in {0,1}
            "points_yx": pts,                             # (K,2)
            "path": ip
        }

def build_concat(ds_root: Path, split: str):
    parts = []
    for sub in ["cracks", "taping"]:
        p = ds_root/sub/split
        if p.exists():
            parts.append(ImgMaskDataset(str(p)))
    if not parts:
        raise RuntimeError(f"Missing split={split}")
    return ConcatDataset(parts)

# =========================
# Train step
# =========================
BCE = torch.nn.BCEWithLogitsLoss()  # autocast-safe

def train_one_step(predictor, batch, optimizer, scaler, device):
    """
    - set_image(image)
    - encode prompts (points)
    - decode low-res masks -> upsample logits
    - loss = BCEWithLogits(logits, gt) + Dice(σ(logits), gt) + tiny score loss
    """
    img = batch["image"]                 # HWC uint8
    gt  = batch["mask"]                  # HW float32 (0/1)
    pts = batch["points_yx"]             # (K,2) or (0,2)

    if pts.shape[0] == 0:
        return None  # skip unlabeled images

    input_point = np.stack([pts[:,1], pts[:,0]], axis=1).astype(np.float32)  # (K,2) x,y
    input_label = np.ones((input_point.shape[0],), dtype=np.float32)         # (K,)

    with torch.autocast(device_type='cuda', enabled=(device.type=='cuda')):
        predictor.set_image(img)

        mask_input, unnorm_coords, labels, _ = predictor._prep_prompts(
            input_point, input_label, box=None, mask_logits=None, normalize_coords=True
        )
        unnorm_coords, labels = _ensure_batched(unnorm_coords, labels)
        if unnorm_coords is None or labels is None or unnorm_coords.shape[1] == 0:
            return None

        sparse_embeddings, dense_embeddings = predictor.model.sam_prompt_encoder(
            points=(unnorm_coords, labels), boxes=None, masks=None
        )
        batched_mode = unnorm_coords.shape[0] > 1
        high_res_features = [feat_level[-1].unsqueeze(0)
                             for feat_level in predictor._features["high_res_feats"]]

        low_res_masks, prd_scores, _, _ = predictor.model.sam_mask_decoder(
            image_embeddings = predictor._features["image_embed"][-1].unsqueeze(0),
            image_pe         = predictor.model.sam_prompt_encoder.get_dense_pe(),
            sparse_prompt_embeddings = sparse_embeddings,
            dense_prompt_embeddings  = dense_embeddings,
            multimask_output = True,   # 3 candidates
            repeat_image     = batched_mode,
            high_res_features= high_res_features,
        )

        # Upsample to original size (logits), then select best-of-3 by score
        up_logits = predictor._transforms.postprocess_masks(low_res_masks, predictor._orig_hw[-1])  # (1,3,H,W)
        best_idx  = prd_scores.argmax(dim=1)                      # (1,)
        logits    = up_logits[torch.arange(up_logits.size(0)), best_idx]  # (1,H,W)

        gt_t   = torch.from_numpy(gt).to(logits.device)[None, ...]       # (1,H,W)

        # --- losses ---
        bce  = BCE(logits, gt_t)                                         # logits!
        prob = torch.sigmoid(logits)                                     # for metrics & dice
        dice = 1.0 - dice_from_prob(prob, gt_t).mean()

        # IoU supervision to score head (tiny term)
        bin_pred = (prob > 0.5)
        bin_gt   = (gt_t > 0.5)
        iou = miou_from_bin(bin_pred, bin_gt).mean()
        score_loss = torch.abs(prd_scores.max(dim=1).values - iou).mean()

        loss = bce + dice + 0.05 * score_loss

    optimizer.zero_grad(set_to_none=True)
    scaler.scale(loss).backward()

    # Clip grads safely in AMP
    if CLIP_NORM:
        scaler.unscale_(optimizer)
        trainable = [p for p in predictor.model.parameters() if p.requires_grad and p.grad is not None]
        if trainable:
            torch.nn.utils.clip_grad_norm_(trainable, CLIP_NORM)

    scaler.step(optimizer)
    scaler.update()

    return {
        "loss": float(loss.detach().cpu()),
        "bce":  float(bce.detach().cpu()),
        "dice": float(1.0 - dice.detach().cpu()),  # report Dice, not DiceLoss
        "miou": float(iou.detach().cpu())
    }

# =========================
# Run
# =========================
set_seed(SEED)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
SAVE_DIR.mkdir(parents=True, exist_ok=True)

DATA_ROOT = find_data_root(BASE_CANDIDATES)
print("Using DATA_ROOT:", DATA_ROOT)

train_set = build_concat(DATA_ROOT, "train")
val_set   = build_concat(DATA_ROOT, "val")

dl_tr = DataLoader(train_set, batch_size=1, shuffle=True,  num_workers=WORKERS, pin_memory=True)
dl_va = DataLoader(val_set,   batch_size=1, shuffle=False, num_workers=WORKERS, pin_memory=True)

# Build SAM2.1 model + predictor
assert os.path.exists(CKPT_PATH), f"Checkpoint not found: {CKPT_PATH}"
sam2_model = build_sam2(MODEL_CFG, CKPT_PATH, device=device.type)
predictor = SAM2ImagePredictor(sam2_model)

# Freeze image encoder; train prompt encoder + mask decoder
if hasattr(predictor.model, "image_encoder"):
    for p in predictor.model.image_encoder.parameters(): p.requires_grad = False
for p in predictor.model.sam_prompt_encoder.parameters(): p.requires_grad = True
for p in predictor.model.sam_mask_decoder.parameters():  p.requires_grad = True
predictor.model.sam_prompt_encoder.train(True)
predictor.model.sam_mask_decoder.train(True)

# Optim
trainable_params = [p for p in predictor.model.parameters() if p.requires_grad]
optimizer = torch.optim.AdamW(trainable_params, lr=LR, weight_decay=WEIGHT_DECAY)

use_amp = (device.type == "cuda")
scaler = torch.amp.GradScaler(enabled=use_amp)

best_miou, best_path = -1.0, None

for epoch in range(1, EPOCHS+1):
    predictor.model.train(True)
    running = {"loss":0.0, "bce":0.0, "dice":0.0, "miou":0.0}
    pbar = tqdm(dl_tr, desc=f"Epoch {epoch}/{EPOCHS}")

    for batch in pbar:
        feed = {
            "image":      batch["image"][0].numpy(),
            "mask":       batch["mask"][0].numpy(),
            "points_yx":  batch["points_yx"][0].numpy(),
            "path":       batch["path"][0],
        }
        metrics = train_one_step(predictor, feed, optimizer, scaler, device)
        if metrics is None:
            continue
        for k in running: running[k] += metrics.get(k, 0.0)
        steps = max(1, sum(1 for _ in [metrics]))
        pbar.set_postfix(
            loss=f"{running['loss']/steps:.4f}",
            dice=f"{running['dice']/steps:.3f}",
            miou=f"{running['miou']/steps:.3f}",
            lr=f"{optimizer.param_groups[0]['lr']:.2e}"
        )

    # -------- Validation (mIoU & Dice) --------
    predictor.model.eval()
    val_ious, val_dices = [], []
    with torch.no_grad(), torch.autocast(device_type='cuda', enabled=use_amp):
        for b in tqdm(dl_va, leave=False, desc="Valid"):
            img = b["image"][0].numpy()
            gt  = b["mask"][0].numpy()
            pts = b["points_yx"][0].numpy()
            if pts.shape[0]==0: 
                continue
            input_point = np.stack([pts[:,1], pts[:,0]], axis=1).astype(np.float32)
            input_label = np.ones((input_point.shape[0],), dtype=np.float32)

            predictor.set_image(img)
            mask_input, unnorm_coords, labels, _ = predictor._prep_prompts(
                input_point, input_label, box=None, mask_logits=None, normalize_coords=True
            )
            unnorm_coords, labels = _ensure_batched(unnorm_coords, labels)
            if unnorm_coords is None or labels is None or unnorm_coords.shape[1]==0:
                continue

            sparse_embeddings, dense_embeddings = predictor.model.sam_prompt_encoder(
                points=(unnorm_coords, labels), boxes=None, masks=None
            )
            batched_mode = unnorm_coords.shape[0] > 1
            high_res_features = [feat_level[-1].unsqueeze(0) for feat_level in predictor._features["high_res_feats"]]
            low_res_masks, prd_scores, _, _ = predictor.model.sam_mask_decoder(
                image_embeddings=predictor._features["image_embed"][-1].unsqueeze(0),
                image_pe=predictor.model.sam_prompt_encoder.get_dense_pe(),
                sparse_prompt_embeddings=sparse_embeddings,
                dense_prompt_embeddings=dense_embeddings,
                multimask_output=True,
                repeat_image=batched_mode,
                high_res_features=high_res_features,
            )
            up_logits = predictor._transforms.postprocess_masks(low_res_masks, predictor._orig_hw[-1])  # (1,3,H,W)
            best_idx  = prd_scores.argmax(dim=1)
            logits    = up_logits[torch.arange(up_logits.size(0)), best_idx]            # (1,H,W)
            prob      = torch.sigmoid(logits)                                           # (1,H,W)
            gt_t      = torch.from_numpy(gt).to(prob.device)[None, ...]                 # (1,H,W)
            val_ious.append(float(miou_from_bin(prob>0.5, gt_t>0.5).mean().cpu()))
            val_dices.append(float(dice_from_prob(prob, gt_t).mean().cpu()))

    mIoU = float(np.mean(val_ious)) if val_ious else 0.0
    mDice= float(np.mean(val_dices)) if val_dices else 0.0
    print(f"[Epoch {epoch}] Val mIoU={mIoU:.4f}  Dice={mDice:.4f}")

    # Save best
    if mIoU > best_miou:
        best_miou = mIoU
        best_path = SAVE_DIR/f"sam2.1_best_e{epoch}_miou{mIoU:.4f}.pt"
        torch.save(predictor.model.state_dict(), best_path)
        print("Saved:", best_path)

# Final save
final_path = SAVE_DIR/"sam2.1_final.pt"
torch.save(predictor.model.state_dict(), final_path)
print("Final:", final_path, "| Best:", best_path)


Using DATA_ROOT: /kaggle/input/taping-cracks/data copy


Epoch 1/10: 100%|██████████| 17595/17595 [28:16<00:00, 10.37it/s, dice=12212.965, loss=7077.9204, lr=5.00e-05, miou=10087.354]
                                                        

[Epoch 1] Val mIoU=0.6542  Dice=0.7641
Saved: /kaggle/working/ckpts_sam2/sam2.1_best_e1_miou0.6542.pt


Epoch 2/10: 100%|██████████| 17595/17595 [27:54<00:00, 10.51it/s, dice=12762.772, loss=6278.5104, lr=5.00e-05, miou=10686.844]
                                                        

[Epoch 2] Val mIoU=0.6774  Dice=0.7847
Saved: /kaggle/working/ckpts_sam2/sam2.1_best_e2_miou0.6774.pt


Epoch 3/10: 100%|██████████| 17595/17595 [28:09<00:00, 10.42it/s, dice=12994.349, loss=5953.8207, lr=5.00e-05, miou=10951.218]
                                                        

[Epoch 3] Val mIoU=0.6865  Dice=0.7891
Saved: /kaggle/working/ckpts_sam2/sam2.1_best_e3_miou0.6865.pt


                                                        .90it/s, dice=5709.616, loss=2531.5772, lr=5.00e-05, miou=4828.374]

[Epoch 5] Val mIoU=0.6888  Dice=0.7919


Epoch 6/10: 100%|██████████| 17595/17595 [26:10<00:00, 11.20it/s, dice=13340.971, loss=5478.1312, lr=5.00e-05, miou=11364.985]
                                                        

[Epoch 6] Val mIoU=0.6997  Dice=0.8014
Saved: /kaggle/working/ckpts_sam2/sam2.1_best_e6_miou0.6997.pt


Epoch 7/10: 100%|██████████| 17595/17595 [25:53<00:00, 11.33it/s, dice=13445.321, loss=5331.1157, lr=5.00e-05, miou=11490.915]
                                                        

[Epoch 7] Val mIoU=0.7011  Dice=0.8007
Saved: /kaggle/working/ckpts_sam2/sam2.1_best_e7_miou0.7011.pt


Epoch 8/10: 100%|██████████| 17595/17595 [25:57<00:00, 11.29it/s, dice=13522.615, loss=5228.5929, lr=5.00e-05, miou=11585.544]
                                                        

[Epoch 8] Val mIoU=0.7024  Dice=0.8016
Saved: /kaggle/working/ckpts_sam2/sam2.1_best_e8_miou0.7024.pt


Epoch 9/10: 100%|██████████| 17595/17595 [26:13<00:00, 11.18it/s, dice=13595.385, loss=5136.2944, lr=5.00e-05, miou=11676.207]
                                                        

[Epoch 9] Val mIoU=0.7012  Dice=0.8009


Epoch 10/10: 100%|██████████| 17595/17595 [25:48<00:00, 11.36it/s, dice=13662.221, loss=5046.8054, lr=5.00e-05, miou=11759.151]
                                                        

[Epoch 10] Val mIoU=0.7011  Dice=0.7990
Final: /kaggle/working/ckpts_sam2/sam2.1_final.pt | Best: /kaggle/working/ckpts_sam2/sam2.1_best_e8_miou0.7024.pt
