# Sample-Level Comparison
## Dataset: Lund2013
(1) **Levenshtein Distance**: Measuring the edit distance between two sequences of sample-labels.  
(2) **Cohen's Kappa**: Measuring the agreement between two sequences of sample-labels.  
(3) **Transition-Matrices' Frobenius Norm**: Computing the transition matrix of each sequence of labels and measuring the Frobenius Norm of the difference between the two matrices.  
(4) **Transition-Matrices' Kullback-Leibler Divergence**: Computing the transition matrix of each sequence of labels, calculating each matrix's first (left-) eigenvector (which is the transition matrix's stationary distribution), and measuring the Kullback-Leibler Divergence between the two stationary distributions.

In [ ]:
import Config.constants as cnst

from GazeDetectors.EngbertDetector import EngbertDetector
from GazeDetectors.NHDetector import NHDetector
from GazeDetectors.REMoDNaVDetector import REMoDNaVDetector
from DataSetLoaders.DataSetFactory import DataSetFactory

import Analysis.comparisons as detector_comparisons
import Analysis.figures as figs

In [ ]:
%%capture --no-stdout

DATASET_NAME = "IRF"
RATERS = ["RZ"]
DETECTORS = [EngbertDetector(), NHDetector(), REMoDNaVDetector()]
COMPARISON_COLUMNS = [(r, d.name) for r in RATERS for d in DETECTORS]
EVENT_MATCHING_PARAMS = {"match_by": "onset", "max_onset_latency": 15, "allow_cross_matching": False}

samples_df, events_df = DataSetFactory.load_and_process(DATASET_NAME, RATERS, DETECTORS)

In [ ]:
samples_levenshtein_grouped = detector_comparisons.compare_samples(samples=samples_df, metric="lev", group_by=cnst.STIMULUS)
lev_distribution_fig = figs.distributions_grid(
    samples_levenshtein_grouped[COMPARISON_COLUMNS],
    plot_type="violin",
    title="Levenshtein Distance Distribution",
    column_title_mapper=lambda col: f"{col[0]}→{col[1]}"
)
lev_distribution_fig.show()

In [ ]:
samples_kappa_grouped = detector_comparisons.compare_samples(samples=samples_df, metric="kappa", group_by=cnst.STIMULUS)
kappa_distribution_fig = figs.distributions_grid(
    samples_kappa_grouped[COMPARISON_COLUMNS],
    plot_type="violin",
    title="Cohen's Kappa Distribution",
    column_title_mapper=lambda col: f"{col[0]}→{col[1]}"
)
kappa_distribution_fig.show()

In [ ]:
samples_frobenius_grouped = detector_comparisons.compare_samples(samples=samples_df, metric="frobenius", group_by=cnst.STIMULUS)
frob_distribution_fig = figs.distributions_grid(
    samples_frobenius_grouped[COMPARISON_COLUMNS],
    plot_type="violin",
    title="Frobenius Norm Distribution",
    column_title_mapper=lambda col: f"{col[0]}→{col[1]}"
)
frob_distribution_fig.show()

In [ ]:
samples_kl_grouped = detector_comparisons.compare_samples(samples=samples_df, metric="kl", group_by=cnst.STIMULUS)
kl_distribution_fig = figs.distributions_grid(
    samples_kl_grouped[COMPARISON_COLUMNS],
    plot_type="violin",
    title="Kullback-Leibler Divergence Distribution",
    column_title_mapper=lambda col: f"{col[0]}→{col[1]}"
)
kl_distribution_fig.show()