## Process Probed Point Distances

In this notebook we ingest, examine, and join data for evaluation of SmartSPIM to CCF registration.

## Inputs

1. List of surface distances between registered and baseline label annotation surfaces
2. Dice overlap agreement results comparing registered and baseline label image surfaces

## See Also

Batch processing CLI: `../src/aind-ccf-alignment-experiments/postprocess_cli.py`

In [1]:
INPUT_DISTANCES_PATH = r"D:\repos\allen-registration\notebooks\data\results\652506\LEVEL_3\2023.06.05\Ex_561_Em_593\labels\split"

In [2]:
import glob
import os
import re
import sys

import numpy as np
import pandas as pd
import itk

itk.auto_progress(1)

import sys

sys.path.append("../src")
from aind_ccf_alignment_experiments.url import parse_sample_filepath

In [3]:
(
    subject_id,
    channel_id,
    registration_date,
    experiment_name,
) = parse_sample_filepath(INPUT_DISTANCES_PATH)
print(
    f"Parsing results for subject {subject_id}, experiment {experiment_name}, channel {channel_id}"
)

Parsing results for subject 652506, experiment LEVEL_3, channel Ex_561_Em_593


## Load and Examine Distances

In [4]:
distance_filepaths = glob.glob(
    f"{INPUT_DISTANCES_PATH}/*{channel_id}_labels_*_distances.csv"
)
print(f"Found {len(distance_filepaths)} files with point boundary distances")

assert len(distance_filepaths) > 0

Found 4 files with point boundary distances


In [5]:
distances = {}

for distance_filepath in distance_filepaths:
    label_value = int(
        re.match(".*_labels_([0-9]*)_distances.csv", distance_filepath).group(
            1
        )
    )
    distances[label_value] = np.loadtxt(distance_filepath)
    print(
        f"{len(distances[label_value])} point distances for label {label_value}"
    )

42609 point distances for label 473
17342 point distances for label 585
19814 point distances for label 788
35861 point distances for label 890


In [6]:
# Create dummy entry for overview of all label results
combined_distances = np.array([])

for label_value in distances:
    combined_distances = np.concatenate(
        (combined_distances, distances[label_value]), axis=0
    )

distances[-1] = combined_distances
print(f"{distances[-1].shape[0]} point distances for label -1 (all)")

115626 point distances for label -1 (all)


In [7]:
def make_distance_entry(
    subject_id,
    channel_name,
    experiment_id,
    registration_date,
    label_value,
    headers,
    distances,
) -> pd.DataFrame:
    return pd.DataFrame(
        [
            [
                subject_id,
                channel_name,
                experiment_id,
                registration_date,
                label_value,
                len(distances),
                np.mean(distances),
                np.std(distances),
                np.min(distances),
                np.median(distances),
                np.max(distances),
            ]
        ],
        columns=headers,
    )

In [8]:
# Signed distances
distance_headers = [
    "subject_id",
    "channel_id",
    "experiment_id",
    "registration_date",
    "label_value",
    "count",
    "mean",
    "std",
    "min",
    "median",
    "max",
]
signed_distance_descriptors = pd.DataFrame([], columns=distance_headers)

for label_value in distances:
    signed_distance_descriptors = pd.concat(
        [
            signed_distance_descriptors,
            make_distance_entry(
                subject_id,
                channel_id,
                experiment_name,
                registration_date,
                label_value,
                distance_headers,
                distances[label_value],
            ),
        ],
        ignore_index=True,
    )

signed_distance_descriptors.head()

Unnamed: 0,subject_id,channel_id,experiment_id,registration_date,label_value,count,mean,std,min,median,max
0,652506,Ex_561_Em_593,LEVEL_3,2023.06.05,473,42609,0.000575,0.040988,-0.14319,8.483461e-44,0.149448
1,652506,Ex_561_Em_593,LEVEL_3,2023.06.05,585,17342,0.020314,0.060305,-0.096413,0.0226709,0.142637
2,652506,Ex_561_Em_593,LEVEL_3,2023.06.05,788,19814,0.057695,0.052597,-0.067114,0.06510509,0.193371
3,652506,Ex_561_Em_593,LEVEL_3,2023.06.05,890,35861,-0.002529,0.051377,-0.127839,1.469402e-43,0.109212
4,652506,Ex_561_Em_593,LEVEL_3,2023.06.05,-1,115626,0.012361,0.054212,-0.14319,0.0092644,0.193371


In [9]:
abs_distance_headers = [
    "subject_id",
    "channel_id",
    "experiment_id",
    "registration_date",
    "label_value",
    "count",
    "abs_mean",
    "abs_std",
    "abs_min",
    "abs_median",
    "abs_max",
]
abs_distance_descriptors = pd.DataFrame([], columns=abs_distance_headers)

for label_value in distances:
    abs_distances = np.abs(distances[label_value])
    abs_distance_descriptors = pd.concat(
        [
            abs_distance_descriptors,
            make_distance_entry(
                subject_id,
                channel_id,
                experiment_name,
                registration_date,
                label_value,
                abs_distance_headers,
                abs_distances,
            ),
        ],
        ignore_index=True,
    )

abs_distance_descriptors.head()

Unnamed: 0,subject_id,channel_id,experiment_id,registration_date,label_value,count,abs_mean,abs_std,abs_min,abs_median,abs_max
0,652506,Ex_561_Em_593,LEVEL_3,2023.06.05,473,42609,0.030192,0.027727,4.897538e-44,0.02276,0.149448
1,652506,Ex_561_Em_593,LEVEL_3,2023.06.05,585,17342,0.054229,0.033296,4.897538e-44,0.053708,0.142637
2,652506,Ex_561_Em_593,LEVEL_3,2023.06.05,788,19814,0.065939,0.0418,4.897538e-44,0.066069,0.193371
3,652506,Ex_561_Em_593,LEVEL_3,2023.06.05,890,35861,0.043761,0.027037,4.897538e-44,0.043398,0.127839
4,652506,Ex_561_Em_593,LEVEL_3,2023.06.05,-1,115626,0.044131,0.033825,4.897538e-44,0.037158,0.193371


In [10]:
distance_descriptors = pd.merge(
    signed_distance_descriptors,
    abs_distance_descriptors,
    on=[
        "subject_id",
        "channel_id",
        "experiment_id",
        "registration_date",
        "label_value",
        "count",
    ],
)
distance_descriptors.head()

Unnamed: 0,subject_id,channel_id,experiment_id,registration_date,label_value,count,mean,std,min,median,max,abs_mean,abs_std,abs_min,abs_median,abs_max
0,652506,Ex_561_Em_593,LEVEL_3,2023.06.05,473,42609,0.000575,0.040988,-0.14319,8.483461e-44,0.149448,0.030192,0.027727,4.897538e-44,0.02276,0.149448
1,652506,Ex_561_Em_593,LEVEL_3,2023.06.05,585,17342,0.020314,0.060305,-0.096413,0.0226709,0.142637,0.054229,0.033296,4.897538e-44,0.053708,0.142637
2,652506,Ex_561_Em_593,LEVEL_3,2023.06.05,788,19814,0.057695,0.052597,-0.067114,0.06510509,0.193371,0.065939,0.0418,4.897538e-44,0.066069,0.193371
3,652506,Ex_561_Em_593,LEVEL_3,2023.06.05,890,35861,-0.002529,0.051377,-0.127839,1.469402e-43,0.109212,0.043761,0.027037,4.897538e-44,0.043398,0.127839
4,652506,Ex_561_Em_593,LEVEL_3,2023.06.05,-1,115626,0.012361,0.054212,-0.14319,0.0092644,0.193371,0.044131,0.033825,4.897538e-44,0.037158,0.193371


## Load and Compare With Dice

In [11]:
dice_overlap_filepath = glob.glob(
    f"{os.path.dirname(INPUT_DISTANCES_PATH)}/*_{channel_id}_overlap.csv"
)[0]
dice_overlap_results = pd.read_csv(dice_overlap_filepath)

if "Unnamed: 0" in dice_overlap_results.keys():
    del dice_overlap_results["Unnamed: 0"]

print(
    f"Loaded {dice_overlap_results.shape} results from {dice_overlap_filepath}"
)
dice_overlap_results.head()

Loaded (5, 3) results from D:\repos\allen-registration\notebooks\data\results\652506\LEVEL_3\2023.06.05\Ex_561_Em_593\labels\652506_Ex_561_Em_593_overlap.csv


Unnamed: 0,ccf_label_name,ccf_label_value,dice_score
0,all,-1,0.263239
1,VIIn,788,0.188
2,MH,473,0.678777
3,fr,585,0.228743
4,aco,890,0.134285


In [12]:
distances_and_overlap = pd.merge(
    distance_descriptors,
    dice_overlap_results,
    left_on=["label_value"],
    right_on=["ccf_label_value"],
)
distances_and_overlap.head()

Unnamed: 0,subject_id,channel_id,experiment_id,registration_date,label_value,count,mean,std,min,median,max,abs_mean,abs_std,abs_min,abs_median,abs_max,ccf_label_name,ccf_label_value,dice_score
0,652506,Ex_561_Em_593,LEVEL_3,2023.06.05,473,42609,0.000575,0.040988,-0.14319,8.483461e-44,0.149448,0.030192,0.027727,4.897538e-44,0.02276,0.149448,MH,473,0.678777
1,652506,Ex_561_Em_593,LEVEL_3,2023.06.05,585,17342,0.020314,0.060305,-0.096413,0.0226709,0.142637,0.054229,0.033296,4.897538e-44,0.053708,0.142637,fr,585,0.228743
2,652506,Ex_561_Em_593,LEVEL_3,2023.06.05,788,19814,0.057695,0.052597,-0.067114,0.06510509,0.193371,0.065939,0.0418,4.897538e-44,0.066069,0.193371,VIIn,788,0.188
3,652506,Ex_561_Em_593,LEVEL_3,2023.06.05,890,35861,-0.002529,0.051377,-0.127839,1.469402e-43,0.109212,0.043761,0.027037,4.897538e-44,0.043398,0.127839,aco,890,0.134285
4,652506,Ex_561_Em_593,LEVEL_3,2023.06.05,-1,115626,0.012361,0.054212,-0.14319,0.0092644,0.193371,0.044131,0.033825,4.897538e-44,0.037158,0.193371,all,-1,0.263239


## Save and Combine Experiment Results

In [13]:
sample_results_filepath = (
    rf"{INPUT_DISTANCES_PATH}\distances_versus_overlap.csv"
)
distances_and_overlap.to_csv(sample_results_filepath)
print(f"Saved results to {sample_results_filepath}")

Saved results to D:\repos\allen-registration\notebooks\data\results\652506\LEVEL_3\2023.06.05\Ex_561_Em_593\labels\split\distances_versus_overlap.csv


In [14]:
COMBINED_RESULTS_FILEPATH = r"D:\repos\allen-registration\notebooks\data\results\652506\compare\distances_versus_overlap.csv"
combined_results = pd.read_csv(COMBINED_RESULTS_FILEPATH)

if "Unnamed: 0" in combined_results.columns:
    del combined_results["Unnamed: 0"]

combined_results.head()

Unnamed: 0,subject_id,channel_id,experiment_id,registration_date,label_value,count,mean,std,min,median,max,abs_mean,abs_std,abs_min,abs_median,abs_max,ccf_label_name,ccf_label_value,dice_score
0,652506,Ex_445_Em_469,ANTS_L4,2023.06.04,473,43406,0.000528,0.044648,-0.099591,-0.009335111,0.159931,0.037191,0.024711,4.897538e-44,0.035195,0.159931,MH,473,0.664242
1,652506,Ex_445_Em_469,ANTS_L4,2023.06.04,585,16732,0.006558,0.05362,-0.089793,-0.009322135,0.149957,0.043102,0.032562,4.897538e-44,0.037139,0.149957,fr,585,0.321656
2,652506,Ex_445_Em_469,ANTS_L4,2023.06.04,788,19309,0.085237,0.06244,-0.066421,0.0875217,0.259457,0.088785,0.057284,4.897538e-44,0.087522,0.259457,VIIn,788,0.08461
3,652506,Ex_445_Em_469,ANTS_L4,2023.06.04,890,33700,0.009891,0.065179,-0.129312,1.314278e-43,0.194552,0.053899,0.037961,8.483461e-44,0.050464,0.194552,aco,890,0.109716
4,652506,Ex_445_Em_469,ANTS_L4,2023.06.04,-1,452588,0.018664,0.063621,-0.129312,0.0092644,0.259457,0.051846,0.041328,4.897538e-44,0.043398,0.259457,all,-1,0.248681


In [15]:
combined_results = pd.concat(
    [
        combined_results,
        distances_and_overlap,
    ]
).drop_duplicates(
    [
        "subject_id",
        "channel_id",
        "experiment_id",
        "registration_date",
        "label_value",
    ]
)
combined_results

Unnamed: 0,subject_id,channel_id,experiment_id,registration_date,label_value,count,mean,std,min,median,max,abs_mean,abs_std,abs_min,abs_median,abs_max,ccf_label_name,ccf_label_value,dice_score
0,652506,Ex_445_Em_469,ANTS_L4,2023.06.04,473,43406,0.000528,0.044648,-0.099591,-0.009335111,0.159931,0.037191,0.024711,4.897538e-44,0.035195,0.159931,MH,473,0.664242
1,652506,Ex_445_Em_469,ANTS_L4,2023.06.04,585,16732,0.006558,0.05362,-0.089793,-0.009322135,0.149957,0.043102,0.032562,4.897538e-44,0.037139,0.149957,fr,585,0.321656
2,652506,Ex_445_Em_469,ANTS_L4,2023.06.04,788,19309,0.085237,0.06244,-0.066421,0.0875217,0.259457,0.088785,0.057284,4.897538e-44,0.087522,0.259457,VIIn,788,0.08461
3,652506,Ex_445_Em_469,ANTS_L4,2023.06.04,890,33700,0.009891,0.065179,-0.129312,1.314278e-43,0.194552,0.053899,0.037961,8.483461e-44,0.050464,0.194552,aco,890,0.109716
4,652506,Ex_445_Em_469,ANTS_L4,2023.06.04,-1,452588,0.018664,0.063621,-0.129312,0.0092644,0.259457,0.051846,0.041328,4.897538e-44,0.043398,0.259457,all,-1,0.248681
5,652506,Ex_488_Em_525,ANTS_L4,2023.06.04,473,46257,0.004901,0.045581,-0.093219,-6.666666e-05,0.127015,0.037482,0.026396,4.897538e-44,0.033236,0.127015,MH,473,0.671484
6,652506,Ex_488_Em_525,ANTS_L4,2023.06.04,585,17688,0.003895,0.044753,-0.093916,-5.773502e-05,0.12218,0.035327,0.027749,4.897538e-44,0.030049,0.12218,fr,585,0.389515
7,652506,Ex_488_Em_525,ANTS_L4,2023.06.04,788,17441,0.115096,0.056604,-0.02276,0.123144,0.255027,0.115285,0.05622,4.897538e-44,0.123144,0.255027,VIIn,788,0.007537
8,652506,Ex_488_Em_525,ANTS_L4,2023.06.04,890,32838,0.000938,0.057538,-0.132761,-5.773502e-05,0.129702,0.047858,0.031955,4.897538e-44,0.046422,0.132761,aco,890,0.111434
9,652506,Ex_488_Em_525,ANTS_L4,2023.06.04,-1,114224,0.020432,0.064885,-0.132761,0.0092644,0.255027,0.052011,0.043846,4.897538e-44,0.041289,0.255027,all,-1,0.261528


In [16]:
combined_results.to_csv(COMBINED_RESULTS_FILEPATH)