0) Setup (paths, pairing, helpers) — 

In [None]:
import os, re
from pathlib import Path
import numpy as np
import imageio.v2 as imageio
from glob import glob
from skimage.measure import label as cc_label  # only used if a mask is binary

# ---- Paths ----
GT_DIR   = "/mnt/efs/aimbl_2025/student_data/S-LS/validation_mask_bacteria"
PRED_DIR = "/mnt/efs/aimbl_2025/student_data/S-LS/stardist/ON_stardist_masks_tif"

# Evaluate only these IDs
ALLOWED_IDS = {13, 14, 15, 16, 17, 18, 19}

gt_pat   = re.compile(r"^mask_(\d+)_bacteria\.tif$")
pred_pat = re.compile(r"^img_(\d+)_bacteria_masks\.tif$")

def _pairs_from_dirs():
    """Return list of (gt_path, pred_path) for the allowed IDs."""
    pairs = []
    for gtf in sorted(glob(os.path.join(GT_DIR, "mask_*_bacteria.tif"))):
        m = gt_pat.match(Path(gtf).name)
        if not m: 
            continue
        idx = int(m.group(1))
        if idx not in ALLOWED_IDS:
            continue
        pred_name = f"img_{idx:03d}_bacteria_masks.tif"
        prf = os.path.join(PRED_DIR, pred_name)
        if not os.path.exists(prf):
            raise FileNotFoundError(f"Missing prediction for GT {Path(gtf).name}: expected {pred_name}")
        pairs.append((gtf, prf))
    assert pairs, "No (GT, PRED) pairs found — check paths and filenames."
    return pairs

def _ensure_labeled(arr):
    """
    Ensure an instance-labeled array: 0=background, 1..K=instances.
    If the array is binary (only 0/1), perform connected-components labeling.
    """
    arr = np.asarray(arr)
    uniq = np.unique(arr)
    if set(uniq.tolist()) <= {0,1}:  # binary -> label CCs
        return cc_label(arr.astype(np.uint8), connectivity=1)
    return arr.astype(np.int32)

def _compact_map(labels):
    """Map arbitrary ids to 0..K, keep 0 as background."""
    labels = labels.astype(np.int64, copy=False)
    max_id = int(labels.max())
    if max_id == 0:
        return labels.astype(np.int32), np.array([], dtype=np.int32)
    ids = np.unique(labels)
    ids = ids[ids != 0]
    mapper = np.zeros(max_id + 1, dtype=np.int64)
    mapper[ids] = np.arange(1, len(ids) + 1, dtype=np.int64)
    return mapper[labels].astype(np.int32), ids.astype(np.int32)

def _match_iou(y_true, y_pred, iou_thr=0.5):
    """
    1–1 greedy matching of instances by IoU.
    Returns dict with TP/FP/FN, matched IoUs list, and per-instance counts.
    """
    t, p = _ensure_labeled(y_true), _ensure_labeled(y_pred)
    assert t.shape == p.shape, f"Shape mismatch {t.shape} vs {p.shape}"

    t_c, true_ids = _compact_map(t)
    p_c, pred_ids = _compact_map(p)

    T, P = len(true_ids), len(pred_ids)
    if T == 0 and P == 0:
        return dict(tp=0, fp=0, fn=0, ious=[])

    area_t = np.bincount(t_c.ravel(), minlength=T+1)[1:].astype(np.int64)  # (T,)
    area_p = np.bincount(p_c.ravel(), minlength=P+1)[1:].astype(np.int64)  # (P,)

    # Intersection via 2D histogram trick
    inter = np.bincount(
        (t_c.ravel() * (P + 1) + p_c.ravel()),
        minlength=(T + 1) * (P + 1)
    ).reshape(T + 1, P + 1)[1:, 1:]  # (T,P)

    union = area_t[:, None] + area_p[None, :] - inter
    with np.errstate(divide='ignore', invalid='ignore'):
        iou = inter / np.maximum(union, 1)

    # Greedy 1–1 assignment
    iou_work = iou.copy()
    matched_ious = []
    while True:
        idx = np.argmax(iou_work)
        i, j = divmod(idx, iou_work.shape[1]) if iou_work.size else (0, 0)
        best = iou_work[i, j] if iou_work.size else 0.0
        if best < iou_thr:
            break
        matched_ious.append(float(best))
        iou_work[i, :] = -1
        iou_work[:, j] = -1

    tp = len(matched_ious)
    fp = P - tp
    fn = T - tp
    return dict(tp=tp, fp=fp, fn=fn, ious=matched_ious, n_true=T, n_pred=P)

PAIRS = _pairs_from_dirs()
print("Pairs to evaluate:", [ (Path(g).name, Path(p).name) for g,p in PAIRS ])

Pairs to evaluate: [('mask_013_bacteria.tif', 'img_013_bacteria_masks.tif'), ('mask_014_bacteria.tif', 'img_014_bacteria_masks.tif'), ('mask_015_bacteria.tif', 'img_015_bacteria_masks.tif'), ('mask_016_bacteria.tif', 'img_016_bacteria_masks.tif'), ('mask_017_bacteria.tif', 'img_017_bacteria_masks.tif'), ('mask_018_bacteria.tif', 'img_018_bacteria_masks.tif'), ('mask_019_bacteria.tif', 'img_019_bacteria_masks.tif')]


1) Precision, Recall, F1 (at IoU ≥ 0.5 by default)


In [2]:
IOU_THR = 0.5  # change to 0.7 for stricter matching

tot_tp = tot_fp = tot_fn = 0
per_image = []

for gtf, prf in PAIRS:
    gt = imageio.imread(gtf)
    pr = imageio.imread(prf)
    m = _match_iou(gt, pr, iou_thr=IOU_THR)
    tot_tp += m["tp"]; tot_fp += m["fp"]; tot_fn += m["fn"]
    per_image.append((Path(gtf).name, m["tp"], m["fp"], m["fn"]))

precision = tot_tp / (tot_tp + tot_fp) if (tot_tp + tot_fp) else 1.0
recall    = tot_tp / (tot_tp + tot_fn) if (tot_tp + tot_fn) else 1.0
f1        = (2*precision*recall) / (precision + recall) if (precision + recall) else 1.0

print(f"[IoU≥{IOU_THR}]  Precision={precision:.4f}  Recall={recall:.4f}  F1={f1:.4f}")
print("TP/FP/FN totals:", tot_tp, tot_fp, tot_fn)
# Optional: per-image TP/FP/FN
for name, tp, fp, fn in per_image:
    print(f"{name}: TP={tp} FP={fp} FN={fn}")

[IoU≥0.5]  Precision=0.8940  Recall=0.8539  F1=0.8735
TP/FP/FN totals: 953 113 163
mask_013_bacteria.tif: TP=202 FP=31 FN=44
mask_014_bacteria.tif: TP=117 FP=21 FN=33
mask_015_bacteria.tif: TP=46 FP=12 FN=15
mask_016_bacteria.tif: TP=249 FP=15 FN=24
mask_017_bacteria.tif: TP=72 FP=15 FN=18
mask_018_bacteria.tif: TP=125 FP=12 FN=13
mask_019_bacteria.tif: TP=142 FP=7 FN=16


2) Mean IoU of matched instances (same IoU threshold)

In [3]:
IOU_THR = 0.5  # must match the threshold used for matching

all_ious = []
for gtf, prf in PAIRS:
    gt = imageio.imread(gtf)
    pr = imageio.imread(prf)
    m = _match_iou(gt, pr, iou_thr=IOU_THR)
    all_ious.extend(m["ious"])

mean_iou = float(np.mean(all_ious)) if all_ious else 0.0
print(f"[IoU≥{IOU_THR}]  Mean IoU of matched instances = {mean_iou:.4f}")
print(f"(Matched pairs counted: {len(all_ious)})")

[IoU≥0.5]  Mean IoU of matched instances = 0.7577
(Matched pairs counted: 953)


3) Counting error (absolute error in instance counts)

In [4]:
abs_errors = []
rows = []
for gtf, prf in PAIRS:
    gt = imageio.imread(gtf)
    pr = imageio.imread(prf)
    m = _match_iou(gt, pr, iou_thr=0.0)  # threshold doesn't matter for counts
    err = abs(m["n_true"] - m["n_pred"])
    abs_errors.append(err)
    rows.append((Path(gtf).name, m["n_true"], m["n_pred"], err))

mean_abs_err = float(np.mean(abs_errors)) if abs_errors else 0.0
print(f"Counting error (|#pred - #gt|): mean = {mean_abs_err:.3f}")
for name, n_t, n_p, e in rows:
    print(f"{name}: GT={n_t}  Pred={n_p}  |Δ|={e}")

Counting error (|#pred - #gt|): mean = 7.143
mask_013_bacteria.tif: GT=246  Pred=233  |Δ|=13
mask_014_bacteria.tif: GT=150  Pred=138  |Δ|=12
mask_015_bacteria.tif: GT=61  Pred=58  |Δ|=3
mask_016_bacteria.tif: GT=273  Pred=264  |Δ|=9
mask_017_bacteria.tif: GT=90  Pred=87  |Δ|=3
mask_018_bacteria.tif: GT=138  Pred=137  |Δ|=1
mask_019_bacteria.tif: GT=158  Pred=149  |Δ|=9


In [5]:
import os, re
from pathlib import Path
import numpy as np
import imageio.v2 as imageio
from glob import glob
from skimage.measure import label as cc_label  # only used if a mask is binary

# ---- Paths ----
GT_DIR   = "/mnt/efs/aimbl_2025/student_data/S-LS/validation_mask_bacteria"
PRED_DIR = "/mnt/efs/aimbl_2025/student_data/S-LS/stardist/beforetraining_stardist_masks_tif"

# Evaluate only these IDs
ALLOWED_IDS = {13, 14, 15, 16, 17, 18, 19}

gt_pat   = re.compile(r"^mask_(\d+)_bacteria\.tif$")
pred_pat = re.compile(r"^img_(\d+)_bacteria__bt_masks\.tif$")

def _pairs_from_dirs():
    """Return list of (gt_path, pred_path) for the allowed IDs."""
    pairs = []
    for gtf in sorted(glob(os.path.join(GT_DIR, "mask_*_bacteria.tif"))):
        m = gt_pat.match(Path(gtf).name)
        if not m: 
            continue
        idx = int(m.group(1))
        if idx not in ALLOWED_IDS:
            continue
        pred_name = f"img_{idx:03d}_bacteria_bt_masks.tif"
        prf = os.path.join(PRED_DIR, pred_name)
        if not os.path.exists(prf):
            raise FileNotFoundError(f"Missing prediction for GT {Path(gtf).name}: expected {pred_name}")
        pairs.append((gtf, prf))
    assert pairs, "No (GT, PRED) pairs found — check paths and filenames."
    return pairs

def _ensure_labeled(arr):
    """
    Ensure an instance-labeled array: 0=background, 1..K=instances.
    If the array is binary (only 0/1), perform connected-components labeling.
    """
    arr = np.asarray(arr)
    uniq = np.unique(arr)
    if set(uniq.tolist()) <= {0,1}:  # binary -> label CCs
        return cc_label(arr.astype(np.uint8), connectivity=1)
    return arr.astype(np.int32)

def _compact_map(labels):
    """Map arbitrary ids to 0..K, keep 0 as background."""
    labels = labels.astype(np.int64, copy=False)
    max_id = int(labels.max())
    if max_id == 0:
        return labels.astype(np.int32), np.array([], dtype=np.int32)
    ids = np.unique(labels)
    ids = ids[ids != 0]
    mapper = np.zeros(max_id + 1, dtype=np.int64)
    mapper[ids] = np.arange(1, len(ids) + 1, dtype=np.int64)
    return mapper[labels].astype(np.int32), ids.astype(np.int32)

def _match_iou(y_true, y_pred, iou_thr=0.5):
    """
    1–1 greedy matching of instances by IoU.
    Returns dict with TP/FP/FN, matched IoUs list, and per-instance counts.
    """
    t, p = _ensure_labeled(y_true), _ensure_labeled(y_pred)
    assert t.shape == p.shape, f"Shape mismatch {t.shape} vs {p.shape}"

    t_c, true_ids = _compact_map(t)
    p_c, pred_ids = _compact_map(p)

    T, P = len(true_ids), len(pred_ids)
    if T == 0 and P == 0:
        return dict(tp=0, fp=0, fn=0, ious=[])

    area_t = np.bincount(t_c.ravel(), minlength=T+1)[1:].astype(np.int64)  # (T,)
    area_p = np.bincount(p_c.ravel(), minlength=P+1)[1:].astype(np.int64)  # (P,)

    # Intersection via 2D histogram trick
    inter = np.bincount(
        (t_c.ravel() * (P + 1) + p_c.ravel()),
        minlength=(T + 1) * (P + 1)
    ).reshape(T + 1, P + 1)[1:, 1:]  # (T,P)

    union = area_t[:, None] + area_p[None, :] - inter
    with np.errstate(divide='ignore', invalid='ignore'):
        iou = inter / np.maximum(union, 1)

    # Greedy 1–1 assignment
    iou_work = iou.copy()
    matched_ious = []
    while True:
        idx = np.argmax(iou_work)
        i, j = divmod(idx, iou_work.shape[1]) if iou_work.size else (0, 0)
        best = iou_work[i, j] if iou_work.size else 0.0
        if best < iou_thr:
            break
        matched_ious.append(float(best))
        iou_work[i, :] = -1
        iou_work[:, j] = -1

    tp = len(matched_ious)
    fp = P - tp
    fn = T - tp
    return dict(tp=tp, fp=fp, fn=fn, ious=matched_ious, n_true=T, n_pred=P)

PAIRS = _pairs_from_dirs()
print("Pairs to evaluate Before training:", [ (Path(g).name, Path(p).name) for g,p in PAIRS ])

Pairs to evaluate Before training: [('mask_013_bacteria.tif', 'img_013_bacteria_bt_masks.tif'), ('mask_014_bacteria.tif', 'img_014_bacteria_bt_masks.tif'), ('mask_015_bacteria.tif', 'img_015_bacteria_bt_masks.tif'), ('mask_016_bacteria.tif', 'img_016_bacteria_bt_masks.tif'), ('mask_017_bacteria.tif', 'img_017_bacteria_bt_masks.tif'), ('mask_018_bacteria.tif', 'img_018_bacteria_bt_masks.tif'), ('mask_019_bacteria.tif', 'img_019_bacteria_bt_masks.tif')]


In [6]:
IOU_THR = 0.5  # change to 0.7 for stricter matching

tot_tp = tot_fp = tot_fn = 0
per_image = []

for gtf, prf in PAIRS:
    gt = imageio.imread(gtf)
    pr = imageio.imread(prf)
    m = _match_iou(gt, pr, iou_thr=IOU_THR)
    tot_tp += m["tp"]; tot_fp += m["fp"]; tot_fn += m["fn"]
    per_image.append((Path(gtf).name, m["tp"], m["fp"], m["fn"]))

precision = tot_tp / (tot_tp + tot_fp) if (tot_tp + tot_fp) else 1.0
recall    = tot_tp / (tot_tp + tot_fn) if (tot_tp + tot_fn) else 1.0
f1        = (2*precision*recall) / (precision + recall) if (precision + recall) else 1.0

print(f"Before training [IoU≥{IOU_THR}]  Precision={precision:.4f}  Recall={recall:.4f}  F1={f1:.4f}")
print("TP/FP/FN totals:", tot_tp, tot_fp, tot_fn)
# Optional: per-image TP/FP/FN
for name, tp, fp, fn in per_image:
    print(f"{name}: TP={tp} FP={fp} FN={fn}")

Before training [IoU≥0.5]  Precision=0.7379  Recall=0.7796  F1=0.7582
TP/FP/FN totals: 870 309 246
mask_013_bacteria.tif: TP=183 FP=83 FN=63
mask_014_bacteria.tif: TP=110 FP=44 FN=40
mask_015_bacteria.tif: TP=35 FP=40 FN=26
mask_016_bacteria.tif: TP=240 FP=23 FN=33
mask_017_bacteria.tif: TP=62 FP=40 FN=28
mask_018_bacteria.tif: TP=118 FP=47 FN=20
mask_019_bacteria.tif: TP=122 FP=32 FN=36


In [7]:
IOU_THR = 0.5  # must match the threshold used for matching

all_ious = []
for gtf, prf in PAIRS:
    gt = imageio.imread(gtf)
    pr = imageio.imread(prf)
    m = _match_iou(gt, pr, iou_thr=IOU_THR)
    all_ious.extend(m["ious"])

mean_iou = float(np.mean(all_ious)) if all_ious else 0.0
print(f"Before training [IoU≥{IOU_THR}]  Mean IoU of matched instances = {mean_iou:.4f}")
print(f"(Matched pairs counted: {len(all_ious)})")

Before training [IoU≥0.5]  Mean IoU of matched instances = 0.7523
(Matched pairs counted: 870)


In [8]:
abs_errors = []
rows = []
for gtf, prf in PAIRS:
    gt = imageio.imread(gtf)
    pr = imageio.imread(prf)
    m = _match_iou(gt, pr, iou_thr=0.0)  # threshold doesn't matter for counts
    err = abs(m["n_true"] - m["n_pred"])
    abs_errors.append(err)
    rows.append((Path(gtf).name, m["n_true"], m["n_pred"], err))

mean_abs_err = float(np.mean(abs_errors)) if abs_errors else 0.0
print(f"Before training, Counting error (|#pred - #gt|): mean = {mean_abs_err:.3f}")
for name, n_t, n_p, e in rows:
    print(f"{name}: GT={n_t}  Pred={n_p}  |Δ|={e}")

Before training, Counting error (|#pred - #gt|): mean = 13.000
mask_013_bacteria.tif: GT=246  Pred=266  |Δ|=20
mask_014_bacteria.tif: GT=150  Pred=154  |Δ|=4
mask_015_bacteria.tif: GT=61  Pred=75  |Δ|=14
mask_016_bacteria.tif: GT=273  Pred=263  |Δ|=10
mask_017_bacteria.tif: GT=90  Pred=102  |Δ|=12
mask_018_bacteria.tif: GT=138  Pred=165  |Δ|=27
mask_019_bacteria.tif: GT=158  Pred=154  |Δ|=4


your model got better at detecting the right objects (↑precision/recall/F1, ↓counting error), but the shape/overlap quality of each correctly detected object stayed about the same (≈mean IoU). That’s very common in instance segmentation.

Here’s what’s going on and what to check:

Why mean IoU stayed flat

Different aspects of quality:

Precision/Recall/F1 measure detection—did you find each GT object and avoid spurious ones?

Mean IoU (over matched pairs) measures overlap/shape accuracy of the objects you did detect.

Training improved detection, not boundaries: Your model likely reduced FPs and FNs (fewer missed or extra instances), but the boundary predictions for matched objects stayed similar → mean IoU barely moves.

Mean IoU only looks at matched pairs: If before/after are both matching roughly the same kinds of objects, their average IoU can remain stable even while you detect more total objects.

Ceilings from data/geometry: IoU is often capped by annotation noise, pixel resolution, or the model’s shape family (e.g., StarDist’s star-convex polygons). Those limits keep IoU from rising much without architectural or labeling changes.