Scale Analysis Example
====
Using scales from Rabia Sevil

We'll read the LIF files, try to segment them out (either just by thresholding or by using a pretrained model) and then run EFA to summarise their shape variation.

Read in the scales
----

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
plot = True

In [None]:
import pathlib

parent_dir = pathlib.Path("~/zebrafish_rdsf/Rabia/SOST scales").expanduser()
assert parent_dir.exists()

scale_dirs = tuple(d for d in parent_dir.glob("*") if not d.stem in {".DS_Store", "TIFs"})

In [None]:
from scale_morphology.scales import read

scale_dir = scale_dirs[1]
scale_paths = scale_dir.glob("*.lif")
path = next(scale_paths)
print(path)

names, images = zip(*read.read_lif(path))

In [None]:
import math
import textwrap
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt


def factor_int(n):
    val = math.ceil(math.sqrt(n))
    val2 = int(n / val)
    while val2 * val != float(n):
        val -= 1
        val2 = int(n / val)
    return val, val2


def plot_imgs(images, **plot_kw):
    global titles

    n_figs = factor_int(len(images))

    fig, axes = plt.subplots(*n_figs, figsize=[3 * x for x in n_figs])
    for axis, img, title in zip(tqdm(axes.flat), images, titles):
        axis.imshow(img, **plot_kw)
        axis.set_title(title)
        axis.set_axis_off()
    fig.tight_layout()


titles = ["\n".join(textwrap.wrap(name, width=10)) for name in names]
if plot:
    plot_imgs(images)

Segment them
----
Now that we have read the scales into memory, we want to threshold them out.
There aren't that many, so we could probably just do this by hand, but I don't have a mouse right now so I'm going to try to do it using computers.

In [None]:
"""
So I can continue with the analysis while some stuff downloads, let's pretend that these are the finished masks...
"""
import pathlib
import tifffile
mask_dir = pathlib.Path("segmentation_stuff/masks")

masks = [tifffile.imread(path) for path in tqdm(list(mask_dir.glob("*.tif")))]

Elliptical Fourier Analysis
----
We'll summarise their shapes using Elliptical Fourier Analysis (EFA)
<a name="cite_ref-1"></a><sup>[1]</sup>
<a name="cite_ref-2"></a><sup>[2]</sup>,
which basically decomposes the boundary into sums of ellipses.
The coefficients (strength and direction of each size of ellipse) tell us about the shape of the object.
There's a demonstration of how this works [here](https://reinvantveer.github.io/2019/07/12/elliptical_fourier_analysis.html).

Our edge is constructed as:

\begin{aligned}
x(t) &= a_0 + \sum_{n=1}^{N} \big[a_n \cos(n t) + b_n \sin(n t)\big],\\
y(t) &= c_0 + \sum_{n=1}^{N} \big[c_n \cos(n t) + d_n \sin(n t)\big],
\qquad t \in [0, 2\pi].
\end{aligned}

with:

\begin{aligned}
a_0 = \frac{1}{2\pi}\int_{0}^{2\pi} x(t)\,dt,\qquad
c_0 = \frac{1}{2\pi}\int_{0}^{2\pi} y(t)\,dt.
\end{aligned}

\begin{aligned}
a_n &= \frac{1}{\pi}\int_{0}^{2\pi} x(t)\cos(n t)\,dt, &
b_n &= \frac{1}{\pi}\int_{0}^{2\pi} x(t)\sin(n t)\,dt,\\
c_n &= \frac{1}{\pi}\int_{0}^{2\pi} y(t)\cos(n t)\,dt, &
d_n &= \frac{1}{\pi}\int_{0}^{2\pi} y(t)\sin(n t)\,dt.
\end{aligned}

possibly up to some factors of $2\pi$

In [None]:
%load_ext autoreload
%autoreload 2
"""
Perform EFA on the scales and plot the reconstruction
"""
import importlib

import numpy as np
from tqdm.notebook import tqdm
from scipy.ndimage import binary_fill_holes
from scale_morphology.scales import efa, errors, segmentation

n_edge_points = 200
order = 50

coeffs = []
for scale in tqdm(masks):
    # Hack - lots of them are broken
    # This doesn't even work as a hack. oh well
    scale = segmentation._largest_connected_component(scale)
    scale = binary_fill_holes(scale, structure=np.ones((3, 3)))
    scale = (scale * 255).astype(np.uint8)
    try:
        coeffs.append(efa.coefficients(scale, n_edge_points, order))
    except errors.BadImgError as e:
        coeffs.append(np.ones((order, 4)) * np.nan)
        print(f"\nError processing scale: {e}. NaN coeffs")
coeffs = np.stack(coeffs)

In [None]:
good_indices = ~np.isnan(coeffs).any(axis=(1, 2))

good_mask_names = np.array([path.name for path in tqdm(list(mask_dir.glob("*.tif")))])[
    good_indices
]
good_coeffs = coeffs[good_indices]

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

images = np.array(masks)[good_indices]
flat_coeffs = good_coeffs.reshape((good_coeffs.shape[0], -1))

pca = PCA(n_components=2)
transformed_coeffs = np.ascontiguousarray(pca.fit_transform(flat_coeffs))

good_imgs = [
    tifffile.imread(f"segmentation_stuff/masks/{pathlib.Path(p).name}")
    for p in tqdm(good_mask_names)
]

In [None]:
from scale_morphology.scales import dashboard

embeddable_imgs = [
    dashboard.embeddable_image(i.astype(np.uint8) * 255) for i in tqdm(good_imgs)
]


colours = []


def _colour(name):
    name = name.lower()
    if "hom" in name:
        if "ontogenetic" in name:
            return 0
        return 1
    if "ontogenetic" in name:
        return 2
    return 3


for name in good_mask_names:
    colours.append(str(_colour(name)))

dashboard_df = pd.DataFrame(transformed_coeffs, columns=["x", "y"])
dashboard_df["image"] = embeddable_imgs
dashboard_df["colour"] = colours
dashboard_df["name"] = good_mask_names

dashboard_df

In [None]:
from bokeh.plotting import figure, save
from bokeh.models import ColumnDataSource, HoverTool, CategoricalColorMapper
from bokeh.resources import INLINE
from scipy.spatial import ConvexHull

factors = np.unique(dashboard_df["colour"])
mapper = CategoricalColorMapper(factors=factors, palette=f"Category10_4")

datasource = ColumnDataSource(dashboard_df)
fig = figure(
    title="Test", width=800, height=800, tools="pan, wheel_zoom, box_zoom, reset"
)

fig.add_tools(
    (
        HoverTool(
            tooltips="""
<div>
    <div>
        <img src="@image" style="float: left; margin: 5px 5px 5px 5px;">
    </div>
    <div>
        <span style="font-size: 17px; font-weight: bold;">@name</span>
    </div>
</div>
"""
        )
    )
)


fig.scatter(
    x="x",
    y="y",
    source=datasource,
    size=4,
    color={"field": "colour", "transform": mapper},
    legend_field="colour",
)

for i, colour_value in enumerate(np.unique(dashboard_df["colour"])):
    group_points = dashboard_df[dashboard_df["colour"] == colour_value][
        ["x", "y"]
    ].values

    if len(group_points) >= 3:
        hull = ConvexHull(group_points)

        vertices = group_points[hull.vertices]

        # Close the polygon by adding the first point at the end
        vertices = np.vstack([vertices, vertices[0]])

        hull_color = mapper.palette[i % len(mapper.palette)]
        fig.patch(
            x=vertices[:, 0],
            y=vertices[:, 1],
            alpha=0.2,
            line_color=hull_color,
            line_width=2,
            fill_color=hull_color,
        )

filename = "test_dashboard.html"
save(
    fig,
    filename=filename,
    title=pathlib.Path(filename.replace(".html", "")).name,
    resources=INLINE,
)

In [None]:
from scale_morphology.scripts.plotting import interpret_dimensions

interpret_dimensions._plot_pca_importance(
    good_coeffs, np.zeros(good_coeffs.shape[0], dtype=bool)
)

In [None]:
sizes = [np.sum(m) / 255 for m in tqdm(np.array(masks)[good_indices])]

In [None]:
plt.plot(sizes, transformed_coeffs[:, 0], ".")
plt.xlabel("Scale Size (pixels?)")
plt.ylabel("PC1")
plt.title("The first principal component corresponds tells us about size")
plt.tight_layout()

plot_dir = pathlib.Path("rabia")
plot_dir.mkdir(exist_ok=True)
plt.savefig(plot_dir /"sizes.png")

In [None]:
from scipy.spatial import ConvexHull
from skimage.draw import polygon


def convex_hull_mask(mask_bool):
    """Rasterize the convex hull of foreground pixels into a mask."""
    ys, xs = np.nonzero(mask_bool)
    pts = np.stack([xs, ys], axis=1)  # (x, y)
    if pts.shape[0] < 3:
        return mask_bool.copy(), 0.0
    hull = ConvexHull(pts)
    hull_xy = pts[hull.vertices]
    rr, cc = polygon(hull_xy[:, 1], hull_xy[:, 0], shape=mask_bool.shape)
    hull_mask = np.zeros_like(mask_bool, dtype=bool)
    hull_mask[rr, cc] = True
    return hull_mask, hull.volume  # hull.volume is 2D area


def concavity_metrics(mask_bool):
    area = float(mask_bool.sum())
    if area == 0:
        return dict(
            solidity=np.nan,
            concavity_frac=np.nan,
            dent_max=np.nan,
            dent_mean=np.nan,
            dent_max_norm=np.nan,
            concavity_area=np.nan,
        )

    hull_mask, hull_area = convex_hull_mask(mask_bool)
    if hull_area == 0:
        return dict(
            solidity=np.nan,
            concavity_frac=np.nan,
            dent_max=np.nan,
            dent_mean=np.nan,
            dent_max_norm=np.nan,
            concavity_area=np.nan,
        )

    # Area-based concavity
    solidity = area / hull_area
    return 1.0 - solidity


concavity = [concavity_metrics(m == 255) for m in tqdm(np.array(masks)[good_indices])]

In [None]:
plt.plot(concavity, transformed_coeffs[:, 1], ".")
plt.tight_layout()

[^1](#cite_ref-1):  F. P. Kuhl and C. R. Giardina, ‘Elliptic Fourier features of a closed contour’, Computer Graphics and Image Processing, vol. 18, no. 3, pp. 236–258, Mar. 1982, doi: 10.1016/0146-664x(82)90034-x. 

[^2](#cite_ref-2): N. MacLeod, 'PalaeoMath 101 part 25: the centre cannot hold II: Elliptic fourier
analysis.' Palaeontol. Assoc. Newslett. 79, 29–43, 2012 http://go.palass.org/65a.