## Evaluation of the VNC length method

This notebook compares the calculated VNC length values with annotated data, measured manually. It therefore requires annotated data, which should be placed under the directory `annotated`, within the experiment directory. 

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib widget
from pathlib import Path

import matplotlib.pyplot as plt
from tifffile import imread

from pasnascope import centerline_errors, find_hatching, grid_search, utils, vnc_length

experiment_name = '20240619'
root_dir = Path.cwd().parent
project_dir = root_dir.joinpath('data', experiment_name)
annotated_dir = project_dir.joinpath('annotated')
img_dir = project_dir.joinpath('embs')

if not annotated_dir.exists():
    print('Cannot evaluate the measurements for the current experiment.\nThe evaluations require manually measured data.')
    assert False

Compare center line estimation against annotated data for the first `n` embryos.

Though not ideal, sometimes the embryo names of the annotated data don't match the names generated by pasnascope.
In these cases, the name look up table can be used to associate annotated files to pasnascope sliced movies.
The look up table can also be used to ignore embryos if needed.
The look up table is a dictionary, where the keys are the numbers used to identify the embryos in annotated data and the values are the numbers of the individual movies that were cropped using `pasnascope`.

Use the image generated from `process-raw-data` notebook to inspect the numbers used by `pasnascope`.

In [None]:
interval = 50
n = 3

# name lookup table in cases the annotated embs and the pasnascope embs
# have different numbering
# can be removed if all emb names match
name_LUT = {1:1,2:2, 3:4, 4:5, 5:6, 6:7, 7:8, 8:9, 9:10, 10:11, 11:12, 12:13, 13:14, 14:15, 15:16, 16:17, 17:18, 18:19, 19:21, 20:22, 21: 23, 22: 24, 23: 31, 24: 33, 25:25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 32}

annotated = sorted(annotated_dir.glob('*ch2.csv'), key=utils.emb_number)
annotated = annotated[:n]

if name_LUT:
    emb_names = centerline_errors.match_names(annotated, name_LUT)
    embs = [img_dir.joinpath(emb) for emb in emb_names]
else:
    embs = annotated

lengths = []
ann_lengths = []

for emb, ann_emb in zip(embs, annotated):
    print(emb.stem)
    img_path = img_dir / f"{emb.stem}.tif"
    hp = find_hatching.find_hatching_point(img_path)
    hp -= hp % interval

    img = imread(emb, key=range(0,hp, interval))
    vnc_len =  vnc_length.measure_VNC_centerline(img)
    lengths.append(vnc_len)

    csv_path = annotated_dir / ann_emb
    csv_end = hp//interval
    manual = vnc_length.get_length_from_csv(csv_path, columns=[1], end=csv_end)
    ann_lengths.append(manual)

fig, ax = plt.subplots(len(embs))
ax = ax.ravel()
fig.canvas.header_visible = False
fig.canvas.resizable = False
fig.suptitle('Centerline estimation')
for i, (l, al) in enumerate(zip(lengths, ann_lengths)):
    x = list(range(0, len(l)*interval, interval))
    ax[i].plot(x, l, color='r', label='calculated')
    ax[i].plot(x, al, color='g', label='annotated')
ax[0].legend()

plt.tight_layout()

Center line estimation for a single embryo

In [None]:
i = 16
interval = 50

embs = sorted(annotated_dir.glob('*ch2.csv'), key=utils.emb_number)
emb = embs[i]
img = img_dir / f"{emb.stem}.tif"

hp = find_hatching.find_hatching_point(img)
hp -= hp % interval
img = imread(img, key=range(0,hp, interval))

vnc_len = vnc_length.measure_VNC_centerline(img)
x = list(range(0, len(vnc_len)*interval, interval))

csv_path = annotated_dir / emb
csv_end = hp//interval
manual = vnc_length.get_length_from_csv(csv_path, columns=(1,), end=csv_end)

fig, ax = plt.subplots()
fig.canvas.header_visible = False
fig.canvas.resizable = False
fig.suptitle(f'VNC length estimation - {emb.stem}')

ax.plot(x, vnc_len, label='calculated')
ax.plot(x, manual, label='annotated')
ax.legend();

Visualize how much variability in the centerline estimation we have, since RANSAC in inherently non-deterministic.

In [None]:
i = 0
num_replicates = 5
interval = 20

imgs = sorted(img_dir.glob('*ch2.tif'), key=utils.emb_number)
img_path = imgs[i]
print(img_path.stem)

hp = find_hatching.find_hatching_point(img_path)
img = imread(img_path, key=range(0,hp, interval))

replicates = []
for i in range(num_replicates):
    replicates.append(vnc_length.measure_VNC_centerline(img))

fig, ax = plt.subplots()
fig.canvas.header_visible = False
fig.canvas.resizable = False
fig.suptitle(f'VNC length estimation for {img_path.stem} (n={num_replicates})')

x = list(range(0, len(replicates[0])*interval, interval))
for l in replicates:
    ax.plot(x, l)

plt.tight_layout()

Tests all combinations of the passed parameters to find the best performance for a given experiment.

In [None]:
interval = 50
results_path = project_dir.parents[1].joinpath(
    'results', experiment_name, 'centerline_errors')
exp_dir = project_dir

name_LUT = {1:1,2:2, 3:4, 4:5, 5:6, 6:7, 7:8, 8:9, 9:10, 10:11, 11:12, 12:13, 13:14, 14:15, 15:16, 16:17, 17:18, 18:19, 19:21, 20:22, 21: 23, 22: 24, 23: 31, 24: 33, 25:25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 32}

embs, annotated = centerline_errors.load_files(img_dir, annotated_dir)

if name_LUT:
    emb_names = centerline_errors.match_names(annotated, name_LUT)
    embs = [img_dir.joinpath(emb) for emb in emb_names]
else:
    embs = annotated


should_calculate_grid = True

thres_rels = [0, 0.1, 0.2, 0.3, 0.5]
min_dists = [1, 3, 5, 7, 13]

if should_calculate_grid:
    emb_files = grid_search.get_emb_files(exp_dir, 2)
    print(emb_files)
    estimator = centerline_errors.evaluate_CLE_global
    grid_search.search(thres_rels, 'thres_rel', min_dists, 'min_dist', estimator, 
                should_save=True, file_path=results_path, interval=interval, emb_files=emb_files, annotated=annotated)
else:
    grid_search.parse_grid_search_output(thres_rels, min_dists, results_path)


Calculates the average error for all embryos of an experiment that have annotated data.
The error is defined as the average of the absolute relative error.

In [None]:
interval = 50
name_LUT = {1:1,2:2, 3:4, 4:5, 5:6, 6:7, 7:8, 8:9, 9:10, 10:11, 11:12, 12:13, 13:14, 14:15, 15:16, 16:17, 17:18, 18:19, 19:21, 20:22, 21: 23, 22: 24, 23: 31, 24: 33, 25:25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 32}

embs, annotated = centerline_errors.load_files(img_dir, annotated_dir)

if name_LUT:
    emb_names = centerline_errors.match_names(annotated, name_LUT)
    embs = [img_dir.joinpath(emb) for emb in emb_names]
else:
    embs = annotated

errors = centerline_errors.evaluate_CLE_global(embs, annotated_dir, interval=interval)

x = range(len(errors.keys()))
y = [e[0] for e in errors.values()]
embs = errors.keys()

fig, ax = plt.subplots()
ax.plot(x, y, 'b.')
ax.set_xticks([])
ax.set_ylabel('Abs relative error')
fig.canvas.header_visible = False
fig.suptitle(f"Error (compared to annotated data) for exp {project_dir.stem}")

plt.tight_layout()

In [None]:
errors = None

In [None]:
interval = 50
name_LUT = {1:1,2:2, 3:4, 4:5, 5:6, 6:7, 7:8, 8:9, 9:10, 10:11, 11:12, 12:13, 13:14, 14:15, 15:16, 16:17, 17:18, 18:19, 19:21, 20:22, 21: 23, 22: 24, 23: 31, 24: 33, 25:25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 32}

embs, annotated = centerline_errors.load_files(img_dir, annotated_dir)

if name_LUT:
    emb_names = centerline_errors.match_names(annotated, name_LUT)
    embs = [img_dir.joinpath(emb) for emb in emb_names]
else:
    embs = annotated

if errors is None:
    errors = centerline_errors.get_comparison_metrics(embs, annotated, interval=interval, thres_rel=0.3, min_dist=5)

calc, annot = errors

fig, ax = plt.subplots(figsize=(8,6))

for k in calc.keys():
    min_len = min(len(calc[k]), len(annot[k]))
    max_calc = max(calc[k][:min_len])
    norm_calc = [c/max_calc for c in calc[k][:min_len]]
    max_ann = max(annot[k][:min_len])
    norm_ann = [a/max_calc for a in annot[k][:min_len]]
    ax.scatter(calc[k][:min_len], annot[k][:min_len], s=10, marker='o', alpha=0.5, label=k)
ax.legend(bbox_to_anchor=(1.1, 1.1))
plt.tight_layout()