In [None]:
import numpy as np
import plotly.io as pio

import Config.constants as cnst
from Analysis.detector_comparison.DetectorComparisonAnalyzer import DetectorComparisonAnalyzer
from Visualization.distributions_grid import distributions_grid
from Visualization.p_value_heatmap import heatmap_grid

pio.renderers.default = "notebook"

In [ ]:
DATASET = "Lund2013"
COL_MAPPER = lambda col: col[:col.index("ector")] if "ector" in col else col
VERBOSE = True

STAT_TEST_NAME = "Mann-Whitney"
CRITICAL_VALUE = 0.05
CORRECTION = "bonferroni"

SHOW_DISTRIBUTIONS = True
SHOW_P_VALUES = True

In [None]:
%%capture --no-stdout

samples, events, _, event_matches, comparison_columns = DetectorComparisonAnalyzer.preprocess_dataset(DATASET,
                                                                                    column_mapper=COL_MAPPER,
                                                                                    verbose=VERBOSE)
comparison_columns

In [None]:
all_event_metrics = DetectorComparisonAnalyzer.analyze(events, event_matches, samples, verbose=VERBOSE)
sample_metrics = all_event_metrics["Sample Metrics"]
event_features = all_event_metrics["Event Features"]
event_matching_ratios = all_event_metrics["Event Matching Ratios"]
event_matching_feature_diffs = all_event_metrics["Event Matching Feature Diffs"]

# Sample-Level Metrics

In [None]:
print(f"Available sample metrics: {list(sample_metrics.keys())}")

In [None]:
if SHOW_DISTRIBUTIONS:
    for metric in sample_metrics.keys():
        distributions_grid(
            data=sample_metrics[metric][comparison_columns],
            title=f"{DATASET.upper()}:\t\tSample-Level {metric.title()}",
            pdf_min_val=0 if "Transition Matrix" not in metric else None,
            pdf_max_val=1 if "Transition Matrix" not in metric else None,
            column_title_mapper=lambda col: f"{col[0]}→{col[1]}"
        ).show()

# Feature Distributions

In [None]:
print(f"Available event features: {list(event_features.keys())}")

In [None]:
if SHOW_DISTRIBUTIONS:
    for feature in event_features.keys():
        if feature == "Counts":
            title = f"{DATASET.upper()}:\t\tEvent {feature.title()}"
        else:
            title = f"{DATASET.upper()}:\t\tEvents' {feature.title()} Distribution"
        distributions_grid(
            data=event_features[feature],
            title=title,
            show_counts=feature == "Counts",
        ).show()

In [ ]:
if SHOW_P_VALUES:
    for feature in event_features.keys():
        if feature == "Counts":
            continue
        feature_values = event_features[feature].map(lambda cell: [v for v in cell if not np.isnan(v)])
        stat_test_res = DetectorComparisonAnalyzer.event_feature_statistical_comparison(feature_values, STAT_TEST_NAME)
        p_values = stat_test_res.xs(cnst.P_VALUE, axis=1, level=2)
        heatmap_grid(
            p_values,
            critical_value=CRITICAL_VALUE,
            correction=CORRECTION,
            add_annotations=True,
            ignore_above_critical=True
        ).show()

# Event Matching Ratio

In [None]:
if SHOW_DISTRIBUTIONS:
    distributions_grid(
        data=event_matching_ratios["Match Ratio"][comparison_columns],
        title=f"{DATASET.upper()}:\t\tEvent-Matching Ratios",
        pdf_min_val=0,
        pdf_max_val=100,
        column_title_mapper=lambda col: f"{col[0]}→{col[1]}"
    ).show()

# Matched-Events' Feature Differences

In [0]:
print(f"Available matched-event feature differences: {list(event_matching_feature_diffs.keys())}")

In [None]:
if SHOW_DISTRIBUTIONS:
    for feature in event_matching_feature_diffs.keys():
        distributions_grid(
            data=event_matching_feature_diffs[feature][comparison_columns],
            title=f"{DATASET.upper()}:\t\tMatched-Events' {feature.title()} Distribution",
            column_title_mapper=lambda col: f"{col[0]}→{col[1]}",
            pdf_min_val=0 if feature == "IoU" else None,
            pdf_max_val=1 if feature == "IoU" else None,
        ).show()