# LBM-CaImAn Pipeline Demo

calcium imaging segmentation pipeline using CaImAn CNMF on light-beads microscopy data.

sections:
1. load data and inspect metadata
2. configure CNMF parameters (neuron size, patch grid, quality thresholds)
3. run pipeline
4. run component evaluation on existing results
5. filter and visualize accepted neurons

In [None]:
import lbm_caiman_python as lcp
from mbo_utilities import imread
import numpy as np
from pathlib import Path

print(f"lbm_caiman_python version: {lcp.__version__}")

## 1. Load data

In [None]:
input_path = Path(r"E:\datasets\lbm\jeff_lbm\caiman")
output_path = Path(r"E:\datasets\lbm\jeff_lbm\segmentation-v3")

data = imread(input_path)
meta = data.metadata
print(f"Shape: {data.shape}, dtype: {data.dtype}")
print(f"Frame rate: {meta.get('fr', 'N/A')} Hz")
print(f"Pixel size: dx={meta.get('dx', 'N/A')} µm, dy={meta.get('dy', 'N/A')} µm")
print(f"Num planes: {meta.get('num_planes', 'N/A')}")

## 2. Configure parameters

key parameters:
- **gSig, gSiz**: neuron half-width and bounding box in pixels, derived from 10-20 µm target and pixel size
- **rf, stride, K**: patch grid. rf=half-patch-size, stride=overlap step, K=components per patch.
  K is computed from assumed neuron density (92k/mm³) scaled to a single z-slice.
  previous run used rf=20 (40px patches) with K=130 which created grid artifacts —
  every 3.5 pixels got a component regardless of whether a neuron was there.
- **merge_thresh**: spatial correlation threshold for merging duplicate components across patches.
  note: the caiman parameter name is `merge_thresh`, not `merge_thr`.
- **min_SNR, rval_thr**: quality thresholds for evaluate_components (run automatically after CNMF)

In [None]:
ops = lcp.default_ops()

# data metadata
ops["fr"] = meta["frame_rate"]
ops["dxy"] = (meta["dx"], meta["dy"])
ops["decay_time"] = 0.7

# neuron size from 10-20 µm target
dx = meta["dx"]
neuron_min_um, neuron_max_um = 10, 20
half_neuron_px = int(round((neuron_min_um / 2) / dx))
gSiz_px = int(round(neuron_max_um / dx)) + 1
ops["gSig"] = (half_neuron_px, half_neuron_px)
ops["gSiz"] = (gSiz_px, gSiz_px)

# patch grid — K derived from density
patch_side = 80  # pixels per patch side (rf = patch_side // 2)
patch_area_mm2 = (patch_side * dx * 1e-3) ** 2
neurons_per_patch = int(92000 * patch_area_mm2 * 0.05)  # 92k/mm³, ~50 µm z-slice
ops["rf"] = patch_side // 2
ops["stride"] = patch_side // 4
ops["K"] = max(neurons_per_patch + 10, 30)
ops["nb"] = 1

# merging and quality
ops["merge_thresh"] = 0.85
ops["min_SNR"] = 3.0
ops["rval_thr"] = 0.8
ops["p"] = 1
ops["ssub"] = 1
ops["tsub"] = 1
ops["do_motion_correction"] = False

print(f"Pixel size: {dx:.2f} µm/px")
print(f"gSig={ops['gSig']} px, gSiz={ops['gSiz']} px")
print(f"Patch: {patch_side}x{patch_side} px, stride={ops['stride']}, K={ops['K']}")
print(f"Expected ~{neurons_per_patch} neurons/patch")
print(f"Quality: min_SNR={ops['min_SNR']}, rval_thr={ops['rval_thr']}")

## 3. Run pipeline

In [None]:
results = lcp.pipeline(
    input_data=input_path,
    save_path=output_path,
    ops=ops,
    planes=[1],
    force_mcorr=False,
    force_cnmf=False,
)

print(f"\nProcessed {len(results)} plane(s)")
for r in results:
    print(f"  {r}")

In [None]:
# inspect results
if results:
    plane_dir = results[0].parent
    data = lcp.load_planar_results(plane_dir)
    ops_saved = data["ops"]

    print(f"Plane: {ops_saved.get('plane', '?')}")
    print(f"Total components: {ops_saved.get('n_cells_total', ops_saved.get('n_cells', '?'))}")
    print(f"Accepted components: {ops_saved.get('n_cells', '?')}")
    print(f"FOV: {ops_saved.get('Ly', '?')} x {ops_saved.get('Lx', '?')}")
    print(f"Frames: {ops_saved.get('nframes', '?')}")

## 4. Component evaluation on existing results

if the pipeline already ran, load saved estimates and run CaImAn's
`evaluate_components` to compute r_values (spatial correlation) and
SNR_comp (signal-to-noise) for each component.

important: CaImAn's evaluate_components has a bug in its non-memmap
code path (assumes d1,d2,T axis order but movie is T,d1,d2). we
always load through a CaImAn-format mmap file to avoid this.

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
from scipy import sparse
import caiman as cm
from caiman.source_extraction.cnmf.estimates import Estimates
from caiman.source_extraction.cnmf.params import CNMFParams

# load saved estimates
plane_dir = Path(r"E:\datasets\lbm\jeff_lbm\segmentation-v3\zplane01_tp00001-00500")
data = lcp.load_planar_results(plane_dir)
ops_data = data["ops"]
estimates_dict = data["estimates"]

A = estimates_dict["A"]
C = estimates_dict["C"]
Ly = ops_data.get("Ly", 1002)
Lx = ops_data.get("Lx", 725)
print(f"Raw components: {A.shape[1]}")
print(f"FOV: {Ly} x {Lx}")

# reconstruct caiman Estimates object
est = Estimates(A=A, C=C,
                b=estimates_dict.get("b"),
                f=estimates_dict.get("f"))
if estimates_dict.get("S") is not None:
    est.S = estimates_dict["S"]
if estimates_dict.get("YrA") is not None:
    est.YrA = estimates_dict["YrA"]

print("Estimates object reconstructed.")

In [None]:
# create or reuse caiman-format mmap from movie
raw = imread(input_path)
movie = np.asarray(raw[:, 0, :, :]).squeeze().astype(np.float32)
T, d1, d2 = movie.shape
print(f"Movie: {movie.shape}")

mmap_path = plane_dir / f"Yr_d1_{d1}_d2_{d2}_d3_1_order_C_frames_{T}_.mmap"

if not mmap_path.exists():
    print("Writing mmap...")
    fp = np.memmap(str(mmap_path), mode='w+', dtype=np.float32,
                   shape=(d1 * d2, T), order='F')
    for t in range(T):
        fp[:, t] = movie[t].ravel(order='F')
    del fp
    print(f"Created: {mmap_path.name}")
else:
    print(f"Reusing: {mmap_path.name}")

Yr, dims, T_mm = cm.load_memmap(str(mmap_path))
images = np.reshape(Yr.T, [T_mm] + list(dims), order='F')
print(f"Memmap: shape={images.shape}, type={type(images).__name__}")

In [None]:
# run evaluate_components to compute per-component metrics.
# thresholds are kept low here — actual filtering uses AND logic below.
# caiman's built-in uses OR logic (good rval OR good SNR → accept)
# which is too permissive for dense data.
params = CNMFParams()
params.quality.update({
    'use_cnn': False,
    'min_SNR': 1.0,
    'rval_thr': 0.1,
    'rval_lowest': -1,
    'min_SNR_reject': 0.0,
})
params.data['decay_time'] = 0.7
params.data['fr'] = ops_data.get("fr", 10.0)
params.init['gSig'] = (3, 3)

print("Running evaluate_components...")
est.evaluate_components(images, params, dview=None)

print(f"r_values: median={np.median(est.r_values):.3f}, "
      f"range=[{est.r_values.min():.3f}, {est.r_values.max():.3f}]")
print(f"SNR_comp: median={np.median(est.SNR_comp):.3f}, "
      f"range=[{est.SNR_comp.min():.3f}, {est.SNR_comp.max():.3f}]")
print(f"CaImAn default (OR logic): {len(est.idx_components)} accepted")

## 5. Filter and visualize

strict AND filtering: a component must pass ALL of:
- `r_values >= rval_min` (spatial correlation with movie)
- `SNR_comp >= snr_min` (signal-to-noise ratio)
- pixel area within `mn_um` to `mx_um` µm diameter (converted via pixel size)

adjust thresholds and re-run this cell to tune.

In [None]:
# --- thresholds (adjust and re-run this cell) ---
rval_min = 0.8
snr_min = 3.0
mn_um, mx_um = 10, 20  # neuron diameter range in µm

# convert µm to pixel area
dx = meta["dx"]
mn_px = np.pi * (mn_um / (2 * dx)) ** 2
mx_px = np.pi * (mx_um / (2 * dx)) ** 2

# component sizes from sparse A
sizes = np.array((A > 0).sum(axis=0)).ravel()

good = (
    (est.r_values >= rval_min) &
    (est.SNR_comp >= snr_min) &
    (sizes >= mn_px) &
    (sizes <= mx_px)
)
idx_good = np.where(good)[0]
n_pixels = Ly * Lx

print(f"Pixel size: {dx:.2f} µm/px")
print(f"Size: {mn_um}-{mx_um} µm → {mn_px:.0f}-{mx_px:.0f} px")
print(f"rval_min={rval_min}, snr_min={snr_min}")
print(f"")
print(f"AND filtering: {A.shape[1]} → {len(idx_good)} components")
print(f"  rval >= {rval_min}: {(est.r_values >= rval_min).sum()}")
print(f"  SNR  >= {snr_min}:  {(est.SNR_comp >= snr_min).sum()}")
print(f"  size {mn_px:.0f}-{mx_px:.0f} px: {((sizes >= mn_px) & (sizes <= mx_px)).sum()}")
print(f"  ALL:         {len(idx_good)}")

In [None]:
# spatial footprint maps
A_good = A[:, idx_good]
footprint = np.array(A_good.max(axis=1).todense()).ravel().reshape((Ly, Lx), order="F")
comp_ids = np.array(A_good.argmax(axis=1)).ravel().reshape((Ly, Lx), order="F")
mask = footprint > 0
cell_map = np.where(mask, comp_ids % 20, np.nan)

fig, axes = plt.subplots(1, 2, figsize=(16, 6))
axes[0].imshow(footprint, cmap="hot", aspect="auto")
axes[0].set_title(f"max projection — {len(idx_good)} accepted")
axes[0].axis("off")
axes[1].imshow(cell_map, cmap="tab20", aspect="auto", interpolation="nearest")
axes[1].set_title(f"cell map — {len(idx_good)} accepted")
axes[1].axis("off")
plt.tight_layout()
plt.show()
print(f"Coverage: {mask.sum()} / {n_pixels} pixels ({100*mask.sum()/n_pixels:.1f}%)")

In [None]:
# metric distributions with threshold lines
fig, axes = plt.subplots(1, 3, figsize=(16, 4))

axes[0].hist(est.r_values, bins=100, color="gray", alpha=0.7)
axes[0].axvline(rval_min, color="r", ls="--", label=f"rval_min={rval_min}")
axes[0].set_xlabel("r_values")
axes[0].set_title("spatial correlation")
axes[0].legend()

axes[1].hist(np.clip(est.SNR_comp, 0, 30), bins=100, color="gray", alpha=0.7)
axes[1].axvline(snr_min, color="r", ls="--", label=f"snr_min={snr_min}")
axes[1].set_xlabel("SNR_comp")
axes[1].set_title("signal-to-noise")
axes[1].legend()

axes[2].hist(sizes, bins=100, range=(0, 500), color="gray", alpha=0.7)
axes[2].axvline(mn_px, color="r", ls="--", label=f"{mn_um}µm={mn_px:.0f}px")
axes[2].axvline(mx_px, color="r", ls="--", label=f"{mx_um}µm={mx_px:.0f}px")
axes[2].set_xlabel("size (pixels)")
axes[2].set_title("component size")
axes[2].legend()

plt.tight_layout()
plt.show()

In [None]:
# sample traces from top accepted components
if len(idx_good) > 0:
    snr_good = est.SNR_comp[idx_good]
    n_show = min(10, len(idx_good))
    top_idx = np.argsort(snr_good)[::-1][:n_show]

    fig, axes = plt.subplots(n_show, 1, figsize=(14, 2 * n_show), sharex=True)
    if n_show == 1:
        axes = [axes]

    for i, ax in enumerate(axes):
        ci = top_idx[i]
        gi = idx_good[ci]
        ax.plot(C[gi], "k", linewidth=0.5)
        ax.set_ylabel(f"SNR={snr_good[ci]:.1f}\nr={est.r_values[gi]:.2f}", fontsize=8)
        ax.spines["top"].set_visible(False)
        ax.spines["right"].set_visible(False)

    axes[-1].set_xlabel("Frame")
    fig.suptitle(f"Top {n_show} accepted components (by SNR)")
    plt.tight_layout()
    plt.show()