Metrics Investigations
====
Investigating metrics (area, Dice score, Hausdorff distance) for describing how well my EFA captures a shape.

Of course - one thing to consider here is the truth of the segmentation masks; if we expect the scale shape to be smooth (or indeed, if we want to enforce this because its a property that we want - if, for example, we're only interested in the slower-varying shape differences between scales), we might not want to perfectly reconstruct the segmentation mask - the highest harmonics may just be capturing noise.

First, we'll check if my area formula actually works...

In [None]:
"""
Read in some scales
"""

import pathlib
import tifffile
from tqdm.notebook import tqdm

seg_dir = pathlib.Path("segmentation/cleaned_masks")
assert seg_dir.is_dir()

scales = [tifffile.imread(f) for f in tqdm(sorted(list(seg_dir.glob("*"))))]

In [None]:
import numpy as np
from scipy.ndimage import binary_fill_holes
from scale_morphology.scales.segmentation import largest_connected_component

scales = [
    255 * largest_connected_component(binary_fill_holes(m)).astype(np.uint8)
    for m in tqdm(scales)
]

In [None]:
"""
Construct EFA descriptions of them
"""
from scale_morphology.scales import efa

# Set this to 1000, 500 to get pretty good reconstruction of areas
# set it lower to make it faster...
n_points = 100
n_harmonics = 50

coeffs = [efa.coefficients(s, n_points, n_harmonics) for s in tqdm(scales)]
coeffs = np.stack(coeffs)

In [None]:
import matplotlib.pyplot as plt
from scale_morphology.scales import plotting

i = 100
fig, axis = plt.subplots(figsize=(8, 8))
axis.imshow(scales[i].T, origin="lower", cmap="grey")
axis.set_aspect("equal")

locus = np.mean(np.where(scales[i] > 0), axis=1)
plotting.plot_efa(
    locus,
    coeffs[i],
    label="Elliptic Expansion best fit",
    linewidth=3,
    color="#ff00fa",
    axis=axis,
)

x, y = efa.points_around_edge(scales[i], n_points)
axis.plot(x, y, "#00ff05", markersize=3, label="Edges", marker="o", linestyle="none")

axis.set_axis_off()
axis.legend()

In [None]:
from scale_morphology.scales import plotting

i = 100
fig, axis = plt.subplots(figsize=(8, 8))
axis.imshow(scales[i].T, origin="lower", cmap="grey")
axis.set_aspect("equal")

locus = np.mean(np.where(scales[i] > 0), axis=1)
plotting.plot_efa(
    locus,
    coeffs[i],
    label="Elliptic Expansion best fit",
    linewidth=3,
    color="#ff00fa",
    axis=axis,
)

x, y = efa.points_around_edge(scales[i], n_pts)
axis.plot(x, y, "#00ff05", markersize=3, label="Edges", marker="o", linestyle="none")

axis.set_axis_off()
axis.legend()

fig.suptitle("Scale, edge points and reconstruction")

In [None]:
"""
Find their areas
"""

_, val = sorted(np.unique(scales[0]))
true_areas = [np.sum(s) / val for s in scales]

In [None]:
"""
Calculate the area from these coefficients
"""


def area(coeffs: np.ndarray):
    a, b, c, d = coeffs.T
    ns = np.arange(len(a)) + 1
    return -np.pi * np.sum(ns * (a * d - b * c))

approx_areas = [area(c) for c in tqdm(coeffs)]

In [None]:
"""
Plot the area of our scales vs the EFA derived areas - should be a good match
"""

fig, axis = plt.subplots(figsize=(4, 2))
axis.plot(approx_areas, true_areas, ".")
axis.set_title(f"r2 = {np.corrcoef(approx_areas, true_areas)[0, 1]:.6f}")

In [None]:
"""
Plot the area as a function of harmonic order
"""

fig, axis = plt.subplots()

n = 250
orders = np.arange(1, n)

area_diffs = []
errs = []
pbar = tqdm(total=n * len(coeffs))
for o in orders:
    tmp = []
    for coeff_example, true_area in zip(coeffs, true_areas, strict=True):
        tmp.append(abs(area(coeff_example[:o]) - true_area))
        pbar.update(1)
    area_diffs.append(np.median(tmp))
    errs.append(0.5 * np.subtract(*np.quantile(tmp, [0.75, 0.25])))

final_diffs = tmp.copy()

axis.plot(orders, area_diffs, label=r"Median $\Delta$Area")
axis.fill_between(
    orders, np.array(area_diffs) + errs, np.array(area_diffs) - errs, alpha=0.3, label="IQR"
)

axis.legend()
axis.set_yscale("log")

axis.set_ylabel("Error /px")
axis.set_xlabel("Harmonic Order")

fig.suptitle(f"Error in area rapidly approaches 0\n First {n} shown")
fig.tight_layout()

In [None]:
plt.hist(final_diffs, bins=30)
plt.title(f"Final Errors /px after {n_harmonics} harmonics")

Dice Score
----
We're more interested in whether the shape is faithfully reconstructed, than the area

Hausdorff Distance
----
We're actually most interested in the boundary agreement - this is defined by the Hausdorff distance.

In [None]:
"""
Reconstruct a mask from the coefficients
"""

import cv2


def points2mask(shape, x, y):
    """
    Convert points to a segmentation mask
    """
    points = np.stack([y, x], axis=1).astype(np.int32).reshape(-1, 1, 2)

    mask = np.zeros(shape)

    cv2.fillPoly(mask, [points], 255)

    return np.rot90(mask, k=2)


fig, axes = plt.subplots(1, 2)

axes[0].imshow(scales[0].T, cmap="binary", origin="lower")
axes[0].set_aspect("equal")

n_pts = 1000
locus = np.mean(np.where(scales[0] > 0), axis=1)
# x, y = efa.coeffs2points(coeffs[0], , axis=1))
# reconstructed = points2mask(scales[0].shape, x, y)
# axes[1].imshow(scales[0], origin="lower")
# axes[1].imshow(reconstructed, cmap="binary", alpha=0.5, origin="lower")
plotting.plot_efa(locus, coeffs[0], axis=axes[0])

In [None]:
from scale_morphology.scales import plotting

i = 100
fig, axis = plt.subplots(figsize=(8, 8))
axis.imshow(scales[i].T, origin="lower", cmap="grey")
axis.set_aspect("equal")

locus = np.mean(np.where(scales[i] > 0), axis=1)
plotting.plot_efa(
    locus,
    coeffs[i],
    label="Elliptic Expansion best fit",
    linewidth=3,
    color="#ff00fa",
    axis=axis,
)

x, y = efa.points_around_edge(scales[i], n_pts)
axis.plot(x, y, "#00ff05", markersize=3, label="Edges", marker="o", linestyle="none", alpha=0.2)

axis.set_axis_off()
axis.legend()

fig.suptitle("Scale, edge points and reconstruction")