## Measuring VNC length

The VNC length is measured using a centerline estimation.
The steps are as follows:

1. The image is binarized, so we can apply a distance transform.
The distance transform is very succeptible to holes and concavities, so we also remove holes from the binary image and apply a binary opening to smooth edges.
2. We apply a 'chessboard' distance transform (minimum difference between two points along any coordinate dimension) to the binary image, which naturally results in peaks over the centerline of the binary image.
Given the embryo morphology, we expect several center points along the anterior-posterior axis and eventually another set of points towards the brain lobes.
3. To separate these points, we fit a RANSAC model that, given the majority of the points are along the AP axis, fits a line along the VNC centerline.
4. From this line intersection to the binary image, we are able to measure the VNC length.

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib widget
from pathlib import Path

import matplotlib.pyplot as plt
from tifffile import imread

from pasnascope import centerline, centerline_errors, find_hatching, grid_search, utils, vnc_length

experiment_name = '20240515'
root_dir = Path.cwd().parent
project_dir = root_dir.joinpath('data', experiment_name)

Visualize centerline method for a single frame. Includes ROI, inliers (green), outliers(red), and centerline.

In [None]:
img_dir = project_dir.joinpath('embs')
imgs = sorted(img_dir.glob('*ch2.tif'), key=utils.sort_by_emb_name)

img_path = imgs[3]
print(img_path.stem)

img = imread(img_path, key=0)
centerline.view_centerline_dist(img, thres_rel=0.6, min_dist=5)

Compare center line estimation against annotated data.

Though not ideal, sometimes the embryo names of the annotated data don't match the names generated in pasnascope.
In these cases, the name look up table can be used to associate annotated files to pasnascope sliced movies
The look up table can also be used to ignore embryos if needed.
The look up table is a dictionary, where the keys are the numbers used to identify the embryos in the csv files, and the values are the numbers of the individual movies that were cropped using `pasnascope`.

Use the image generated from `process-raw-data` notebook to inspect the numbers used by `pasnascope`.

In [None]:
def get_length_centerline(file_path, hp, interval=20):
    '''Measures VNC length, without masking.'''
    img = imread(file_path, key=range(0,hp, interval))
    return vnc_length.measure_VNC_centerline(img)

interval = 50
img_dir = project_dir.joinpath('embs')
annotated_dir = project_dir.joinpath('annotated')
annotated = sorted(annotated_dir.iterdir(), key=utils.sort_by_emb_name)
annotated = annotated[:5]

name_LUT = {1:1,2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8, 9:9, 10:10, 11:11, 12:12, 13:13, 14:14, 15:15, 16:16, 17:17, 18:18, 19:19, 20:20, 23: 22, 24: 23}

if name_LUT:
    emb_names = vnc_length.match_names(annotated, name_LUT)
    embs = [img_dir.joinpath(emb) for emb in emb_names]
else:
    embs = annotated

lengths = []
ann_lengths = []

for emb, ann_emb in zip(embs, annotated):
    print(emb.stem)
    img_path = img_dir / f"{emb.stem}.tif"
    hp = find_hatching.find_hatching_point(img_path)
    hp -= hp % interval

    vnc_len = get_length_centerline(img_path, hp, interval=interval)
    lengths.append(vnc_len)

    csv_path = annotated_dir / ann_emb
    csv_end = hp//interval
    manual = vnc_length.get_length_from_csv(csv_path, columns=[1], end=csv_end)
    ann_lengths.append(manual)

fig, ax = plt.subplots(len(embs))
ax = ax.ravel()
fig.canvas.header_visible = False
fig.canvas.resizable = False
fig.suptitle('Centerline estimation')
for i, (l, al) in enumerate(zip(lengths, ann_lengths)):
    x = list(range(0, len(l)*interval, interval))
    ax[i].plot(x, l, color='r', label='calculated')
    ax[i].plot(x, al, color='g', label='annotated')
ax[0].legend()

plt.tight_layout()

Center line estimation for a single embryo

In [None]:
def get_length_centerline(file_path, hp, interval=20):
    '''Measures VNC length, without masking.'''
    img = imread(file_path, key=range(0,hp, interval))
    return vnc_length.measure_VNC_centerline(img, thres_rel=0.6)

i = 0
interval = 50

annotated_dir = project_dir.joinpath('annotated')
embs = sorted(annotated_dir.iterdir(), key=utils.sort_by_emb_name)
emb = embs[i]
img_dir = project_dir.joinpath('embs')
img = img_dir / f"{emb.stem}.tif"

print(emb.stem)

hp = find_hatching.find_hatching_point(img)
hp -= hp % interval
vnc_len = get_length_centerline(img, hp, interval=interval)
x = list(range(0, len(vnc_len)*interval, interval))

csv_path = annotated_dir / emb
csv_end = hp//interval
manual = vnc_length.get_length_from_csv(csv_path, columns=[1], end=csv_end)

fig, ax = plt.subplots()
fig.canvas.header_visible = False
fig.canvas.resizable = False
fig.suptitle(f'VNC length estimation')

ax.plot(x, vnc_len, label='calculated')
ax.plot(x, manual, label='annotated')
ax.legend();


Visualize how much variability in the centerline estimation we have, since RANSAC in inherently non-deterministic.

In [None]:
def get_length_centerline(file_path, hp, interval=20):
    '''Measures VNC length, without masking.'''
    img = imread(file_path, key=range(0,hp, interval))
    return vnc_length.measure_VNC_centerline(img)

interval = 20
img_dir = project_dir.joinpath('embs')
imgs = sorted(img_dir.glob('*ch2.tif'), key=utils.sort_by_emb_name)
i = 0
num_replicates = 5

img_path = imgs[i]
print(img_path.stem)

replicates = []

for i in range(num_replicates):
    hp = find_hatching.find_hatching_point(img_path)

    vnc_len = get_length_centerline(img_path, hp, interval=interval)
    replicates.append(vnc_len)

fig, ax = plt.subplots()
fig.canvas.header_visible = False
fig.canvas.resizable = False
fig.suptitle(f'VNC length estimation for {img_path.stem} (n={num_replicates})')

x = list(range(0, len(replicates[0])*interval, interval))
for i,l in enumerate(replicates):
    ax.plot(x, l)

plt.tight_layout()

Tests all combinations of the passed parameters to find the best performance for a given experiment.

In [None]:
results_path = project_dir.joinpath('../../results', 'centerline_errors_20240307')
exp_dir = project_dir

should_calculate_grid = False

thres_rels = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]
min_dists = [1, 3, 5, 7, 9, 11, 13]

if should_calculate_grid:
    emb_files = grid_search.get_emb_files(exp_dir, 2)
    annotated_dir = exp_dir.joinpath('annotated')

    estimator = centerline_errors.evaluate_centerline_estimation
    grid_search.search(thres_rels, 'thres_rel', min_dists, 'min_dist', estimator,
                should_save=True, file_path=results_path, emb_files=emb_files, annotated_dir=annotated_dir)
else:
    grid_search.parse_grid_search_output(thres_rels, min_dists, results_path)


Calculates the average error for all embryos of an experiment that have annotated data.
The error is taken against the annotated data.

In [None]:
emb_dir = project_dir.joinpath('embs')
annotated_dir = project_dir.joinpath('annotated')

emb_files = [emb_dir/f"{f.stem}.tif" for f in annotated_dir.iterdir()]
errors = centerline_errors.evaluate_centerline_estimation(emb_files, annotated_dir)

x = errors.keys()
y = [e[0] for e in errors.values()]

fig, ax = plt.subplots()
ax.plot(x, y, 'b.')
ax.set_ylim(0, 0.05)
fig.canvas.header_visible = False
fig.suptitle(f"Error (compared to annotated data) for exp {project_dir.stem}")

plt.tight_layout()