Show some scales
====
As an illustrative example of the data we're working with.

In [None]:
"""
Read the metadata
"""
import pathlib
import numpy as np

from scale_morphology.scales import util
from scale_morphology.scales import metadata

parent_dir = (
    pathlib.Path(util.config()["rdsf_mount"]) / "Carran/Postgrad/Scale images from WT_spp1_sost/TIFs/segmentations"
).expanduser()
assert parent_dir.exists()

segmentation_paths = sorted([str(x) for x in parent_dir.glob("*.tif")])

df = metadata.df([str(x) for x in segmentation_paths]).drop(columns="no_scale")
assert len(df) == 928, "Did the number of scales change?"

In [None]:
clean_seg_dir = parent_dir.parents[2] / "segmentations_cleaned"
clean_seg_paths = [clean_seg_dir / pathlib.Path(p).name for p in segmentation_paths]

# Check they all exist...
for p in clean_seg_paths:
    assert p.exists()

In [None]:
"""
Show some illustrative examples
"""

import textwrap
import tifffile
import matplotlib.pyplot as plt

fig, axes = plt.subplots(1, 4, figsize=(16, 4))

# Choose some scales to plot
scale_indices = [0, 50, 100, 500]
for axis, path in zip(axes, (df["path"].iloc[i] for i in scale_indices), strict=False):
    axis.imshow(tifffile.imread(path), cmap="binary")

    axis.set_title("\n".join(textwrap.wrap(pathlib.Path(path).stem, width=20)))
    axis.set_xticks([])
    axis.set_yticks([])

Carran's Segmentations
----
We used an AI model to perform the segmentation, but sometimes this didn't work properly.

Carran went through the segmentations, manually correcting the ones that didn't look right. We can plot the differences:

In [None]:
"""
Read in pairs of the clean/raw segmentations, filling holes/removing small objects from the cleaned segmentation.

Keep track of which ones differ from the clean segmentation
"""

from concurrent.futures import ThreadPoolExecutor

import tifffile
import numpy as np
from tqdm.notebook import tqdm
from skimage.measure import euler_number
from scipy.ndimage import binary_fill_holes

from scale_morphology.scales.segmentation import largest_connected_component

scale_paths = df["path"]


def load_scale_data(paths):
    """
    Takes (i, (scale_path, raw_seg_path, clean_seg_path))

    Returns (cleaned_by_carran, fixed_in_post, raw_seg, carran_seg, clean_seg)
    """
    i, (scale_path, raw_seg_path, clean_seg_path) = paths

    raw_seg = (tifffile.imread(raw_seg_path) * 255).astype(np.uint8)
    carran_seg = tifffile.imread(clean_seg_path)

    cleaned_by_carran = (carran_seg != raw_seg).any()

    fixed_in_post = euler_number(carran_seg) != 1
    if fixed_in_post:
        # Fill holes
        clean_seg = binary_fill_holes(carran_seg)
        # Remove small objects
        clean_seg = (largest_connected_component(clean_seg) * 255).astype(np.uint8)

        # It's possible we might have removed everything, so just make sure we haven't here
        if euler_number(clean_seg) != 1:
            raise ValueError(f"Got {euler_number(clean_seg)=}")
    else:
        clean_seg = carran_seg

    return cleaned_by_carran, fixed_in_post, raw_seg, carran_seg, clean_seg


with ThreadPoolExecutor(max_workers=32) as executor:
    results = list(
        tqdm(
            executor.map(
                load_scale_data,
                enumerate(zip(scale_paths, segmentation_paths, clean_seg_paths)),
            ),
            total=len(scale_paths),
        )
    )

In [None]:
raw_segs = []
carran_segs = []
clean_segs = []

# Which segmentations were edited by Carran and which segmentations we needed
# to fill holes/remove small objects from
cleaned_by_carran = []
fixed_in_post = []

for _carran_fix, _post_fix, _raw, _carran, _clean in results:
    cleaned_by_carran.append(_carran_fix)
    fixed_in_post.append(_post_fix)

    raw_segs.append(_raw)
    carran_segs.append(_carran)
    clean_segs.append(_clean)

cleaned_by_carran = np.array(cleaned_by_carran)
fixed_in_post = np.array(fixed_in_post)

raw_segs = np.array(raw_segs)
carran_segs = np.array(carran_segs)
clean_segs = np.array(clean_segs)

In [None]:
"""
Show the masks that Carran edited
"""

import textwrap
import matplotlib.pyplot as plt
from matplotlib import colors


def clear_seismic() -> colors.Colormap:
    """
    Colormap that varies from blue to red via transparent
    """
    c_blue = colors.colorConverter.to_rgba("blue")
    c_white = colors.colorConverter.to_rgba("white", alpha=0)
    c_red = colors.colorConverter.to_rgba("red")
    return colors.ListedColormap([c_blue, c_white, c_red], f"clear2seismic")


fig, axes = plt.subplots(10, 10, figsize=(24, 24))

for axis, path, _raw, _cleaned in zip(
    axes.flat,
    df["path"][cleaned_by_carran],
    raw_segs[cleaned_by_carran],
    carran_segs[cleaned_by_carran],
):
    axis.imshow(_raw, alpha=0.5)
    axis.imshow(
        _raw.astype(int) - _cleaned,
        cmap=clear_seismic(),
        vmin=-255,
        vmax=255,
        interpolation="none",
    )

    axis.set_title(
        "\n".join(textwrap.wrap(pathlib.Path(path).name, width=20))
        .strip(".tif")
        .replace("_", " "),
        fontsize=8,
    )
    axis.set_axis_off()

fig.tight_layout()

We might also be interested in seeing which scales have differences from the automatic hole filling/object moving correction:

In [None]:
fig, axes = plt.subplots(12, 12, figsize=(24, 24))

for axis, path, _raw, _cleaned in zip(
    axes.flat,
    df["path"][fixed_in_post],
    carran_segs[fixed_in_post],
    clean_segs[fixed_in_post],
):
    axis.imshow(_raw, alpha=0.5)

    diff = _raw.astype(int) - _cleaned

    y, x = np.argwhere(diff == -255).T
    axis.scatter(x, y, s=1, c="blue")

    y, x = np.argwhere(diff == 255).T
    axis.scatter(x, y, s=1, c="red")

    axis.set_title(
        "\n".join(textwrap.wrap(pathlib.Path(path).name, width=20))
        .strip(".tif")
        .replace("_", " "),
        fontsize=8,
    )
    axis.set_axis_off()

fig.tight_layout()