# On-Target Threshold
Determine the value of hyperparameter `cnfg.ON_TARGET_THRESHOLD`, which is used to decide if a gaze sample / fixation / visit is "on-target" or not.

In [1]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from statsmodels.sandbox.stats.stats_dhuard import percentileofscore
import plotly.io as pio

import config as cnfg

# pio.renderers.default = "notebook"
pio.renderers.default = "browser"

### Read data

In [2]:
from pipeline.read_data import read_saved_data
_targets, _actions, _metadata, idents, fixations, _visits = read_saved_data(cnfg.OUTPUT_PATH)

### (A) Gaze on Target Identification
#### (1) Distances-from-Target when subject performed any identification
i.e. Hits or False-Alarms

In [3]:
percentiles = [0.5, 0.75, 0.85, 0.9, 0.95, 0.99,]

not_misses = idents.loc[idents["identification_category"] != "miss"]
dist_summary = (
    pd.concat([
        not_misses["distance_dva"].describe(percentiles).rename("all"),
        not_misses.groupby("subject")["distance_dva"].describe(percentiles).T,
    ], axis=1)
).T

if not_misses['distance_dva'].min() > cnfg.ON_TARGET_THRESHOLD_DVA:
    default_threshold_percentile = 0.0
elif not_misses['distance_dva'].max() < cnfg.ON_TARGET_THRESHOLD_DVA:
    default_threshold_percentile = 100.0
else:
    default_threshold_percentile = percentileofscore(not_misses['distance_dva'], cnfg.ON_TARGET_THRESHOLD_DVA)

print(f"When subjects identified a target, {dist_summary.loc['all', '95%']:.2f} DVA of distance covers 95% of the cases.")
print(f"The default threshold of {cnfg.ON_TARGET_THRESHOLD_DVA}DVA covers {default_threshold_percentile:.2f}% of the cases.")
dist_summary

When subjects identified a target, 1.46 DVA of distance covers 95% of the cases.
The default threshold of 1.75DVA covers 96.24% of the cases.


Unnamed: 0,count,mean,std,min,50%,75%,85%,90%,95%,99%,max
all,1248.0,0.827495,1.642685,0.00525,0.539752,0.84034,1.03144,1.187911,1.457244,8.40163,23.297429
2,112.0,0.887389,1.37894,0.00525,0.638451,0.94533,1.13248,1.240759,1.367437,8.195211,9.435844
12,111.0,1.377363,2.263406,0.160329,0.863651,1.245024,1.640418,1.947023,4.304638,9.81311,19.109826
13,99.0,0.782881,1.451985,0.073701,0.538058,0.731862,0.912429,1.155491,1.408013,6.12418,13.597367
14,104.0,1.307629,3.840592,0.032188,0.37025,0.633209,0.733201,0.991192,5.101486,21.035096,23.297429
15,98.0,0.809266,1.268136,0.085205,0.675779,0.865602,1.059637,1.202942,1.37785,2.032327,12.765376
16,90.0,0.423305,0.247078,0.043483,0.350909,0.640706,0.747622,0.778273,0.858737,0.915361,0.967981
17,101.0,0.764956,1.272487,0.065453,0.567267,0.825397,1.03437,1.15349,1.318819,3.75598,12.600789
18,123.0,0.442788,0.809888,0.057758,0.359878,0.493016,0.569625,0.628443,0.778915,0.995772,9.073654
19,116.0,0.792266,0.767282,0.108211,0.701126,1.014256,1.145919,1.215355,1.350801,1.971149,7.93885


In [4]:
fig = make_subplots(
    rows=2, cols=1, shared_xaxes=True, shared_yaxes=False,
)

# top: distribution across all subjects
fig.add_trace(
    row=1, col=1, trace=go.Violin(
        y0="distance", x=not_misses["distance_dva"],
        name="All Subjects", legendgroup="All Subjects",
        text=not_misses.apply(
            lambda row: f"Subject: {row['subject']}<br>"
                        f"Trial: {row['trial']}<br>"
                        # f"Target: {row['target']}<br>"
                        f"Distance: {row['distance_dva']:.2f} DVA",
            axis=1
        ),
        marker=dict(color=cnfg.get_discrete_color("all")),
        width=1.75, orientation="h", side="positive", spanmode='hard',
        box=dict(visible=False),
        meanline=dict(visible=True),
        points="all", pointpos=-0.5,
        showlegend=True, hoverinfo="x+y+text",

    )
)

# bottom: distribution per subject
for subj_id in not_misses[cnfg.SUBJECT_STR].unique():
    subj_string = f"{cnfg.SUBJECT_STR.capitalize()} {subj_id:02d}"
    subj_data = not_misses[not_misses[cnfg.SUBJECT_STR] == subj_id]
    texts = subj_data.apply(
        lambda row: f"{subj_string}<br>"
                    f"Trial: {row['trial']}<br>"
                    # f"Target: {row['target']}<br>"
                    f"Distance: {row['distance_dva']:.2f} DVA",
        axis=1
    )
    fig.add_trace(
        row=2, col=1, trace=go.Violin(
            y0="distance", x=subj_data["distance_dva"],
            text=texts,
            name=subj_string, legendgroup=subj_string,
            marker=dict(color=cnfg.get_discrete_color(subj_id, loop=True), opacity=0.5),
            width=1.75, orientation="h", side="positive", spanmode='hard',
            box=dict(visible=False),
            meanline=dict(visible=True),
            points="all", pointpos=-0.5,
            showlegend=True, hoverinfo="x+y+text"
        )
    )

# update visuals
fig.update_annotations(font=cnfg.AXIS_LABEL_FONT)
fig.update_yaxes(showticklabels=False)  # Hide y-axis labels
fig.update_xaxes(
    title=None, showline=False,
    showgrid=True, gridcolor=cnfg.GRID_LINE_COLOR, gridwidth=cnfg.GRID_LINE_WIDTH,
    zeroline=False, zerolinecolor=cnfg.GRID_LINE_COLOR, zerolinewidth=cnfg.ZERO_LINE_WIDTH,
    tickfont=cnfg.AXIS_TICK_FONT,
)
fig.update_layout(
    width=1200, height=675,
    title=dict(text="Distance on Identification-Action", font=cnfg.TITLE_FONT),
    paper_bgcolor='rgba(0, 0, 0, 0)',
    # plot_bgcolor='rgba(0, 0, 0, 0)',
    showlegend=True,
)

fig.show()

##### (2) Distances-from-Target for `Hit`/`False Alarm` Identifications
We identify `hits` and `false-alarms` based on the distance of the gaze from the closest target when the subject performed an identification action. The threshold is set in `cnfg.ON_TARGET_THRESHOLD_DVA`.

In [5]:
hits = not_misses[not_misses["identification_category"] == "hit"]
hit_dist_summary = (
    pd.concat([
        hits["distance_dva"].describe(percentiles).rename("all"),
        hits.groupby("subject")["distance_dva"].describe(percentiles).T,
    ], axis=1)
).T

if hits['distance_dva'].min() > cnfg.ON_TARGET_THRESHOLD_DVA:
    default_threshold_percentile = 0.0
elif hits['distance_dva'].max() < cnfg.ON_TARGET_THRESHOLD_DVA:
    default_threshold_percentile = 100.0
else:
    default_threshold_percentile = percentileofscore(hits['distance_dva'], cnfg.ON_TARGET_THRESHOLD_DVA)

print(f"For identifications classified as `hits`, {hit_dist_summary.loc['all', '95%']:.2f} DVA of distance covers 95% of the cases.")
print(f"The default threshold of {cnfg.ON_TARGET_THRESHOLD_DVA}DVA covers {default_threshold_percentile:.2f}% of the cases.")
hit_dist_summary

For identifications classified as `hits`, 1.24 DVA of distance covers 95% of the cases.
The default threshold of 1.75DVA covers 100.00% of the cases.


Unnamed: 0,count,mean,std,min,50%,75%,85%,90%,95%,99%,max
all,1191.0,0.583955,0.342398,0.00525,0.522377,0.802773,0.95218,1.072676,1.238744,1.518726,1.712458
2,108.0,0.641263,0.366159,0.00525,0.592702,0.936236,1.064274,1.187911,1.260981,1.412605,1.438825
12,92.0,0.791264,0.363523,0.160329,0.779886,1.006058,1.15834,1.294181,1.469663,1.649469,1.680471
13,95.0,0.569296,0.300453,0.073701,0.525737,0.70652,0.833511,0.952872,1.193932,1.403128,1.517925
14,95.0,0.409261,0.268807,0.032188,0.350813,0.602102,0.646063,0.667448,0.955954,1.180261,1.207467
15,96.0,0.68847,0.348134,0.085205,0.675779,0.859317,1.031871,1.180632,1.333408,1.465944,1.700377
16,90.0,0.423305,0.247078,0.043483,0.350909,0.640706,0.747622,0.778273,0.858737,0.915361,0.967981
17,98.0,0.602429,0.306102,0.065453,0.565192,0.781139,0.915732,1.062111,1.173186,1.364848,1.398881
18,122.0,0.372043,0.20164,0.057758,0.359751,0.491214,0.568692,0.615991,0.756176,0.939437,1.009208
19,110.0,0.709812,0.346265,0.108211,0.677313,0.994107,1.094975,1.177265,1.301972,1.445614,1.525928


In [6]:
fas = not_misses[not_misses["identification_category"] == "false_alarm"]
fas_dist_summary = (
    pd.concat([
        fas["distance_dva"].describe(percentiles).rename("all"),
        fas.groupby("subject")["distance_dva"].describe(percentiles).T,
    ], axis=1)
).T

if fas['distance_dva'].min() > cnfg.ON_TARGET_THRESHOLD_DVA:
    default_threshold_percentile = 0.0
elif fas['distance_dva'].max() < cnfg.ON_TARGET_THRESHOLD_DVA:
    default_threshold_percentile = 100.0
else:
    default_threshold_percentile = percentileofscore(fas['distance_dva'], cnfg.ON_TARGET_THRESHOLD_DVA)

print(f"For identifications classified as `false alarms`, {fas_dist_summary.loc['all', '95%']:.2f} DVA of distance covers 95% of the cases.")
print(f"The default threshold of {cnfg.ON_TARGET_THRESHOLD_DVA}DVA covers {default_threshold_percentile:.2f}% of the cases.")
fas_dist_summary

For identifications classified as `false alarms`, 18.66 DVA of distance covers 95% of the cases.
The default threshold of 1.75DVA covers 0.00% of the cases.


Unnamed: 0,count,mean,std,min,50%,75%,85%,90%,95%,99%,max
all,47.0,7.009501,5.432442,1.785373,5.971665,8.817337,12.617248,14.348471,18.656638,22.305638,23.297429
2,4.0,7.532805,2.123616,4.494146,8.100615,8.52538,8.889565,9.071658,9.253751,9.399425,9.435844
12,15.0,5.051693,4.757665,1.816095,2.261394,6.712295,8.436063,9.395747,12.699509,17.827762,19.109826
13,4.0,5.85552,5.50344,1.785373,4.01967,7.878091,10.165802,11.309657,12.453512,13.368596,13.597367
14,7.0,13.73278,7.678483,3.594204,15.475127,19.37028,21.356967,22.003788,22.650608,23.168065,23.297429
15,1.0,12.765376,,12.765376,12.765376,12.765376,12.765376,12.765376,12.765376,12.765376,12.765376
17,3.0,6.074165,5.730696,1.865727,3.75598,8.178384,9.947346,10.831827,11.716308,12.423893,12.600789
18,1.0,9.073654,,9.073654,9.073654,9.073654,9.073654,9.073654,9.073654,9.073654,9.073654
19,3.0,3.932055,3.470517,1.867956,1.98936,4.964105,6.154003,6.748952,7.343901,7.81986,7.93885
20,2.0,6.127572,0.145192,6.024906,6.127572,6.178905,6.199439,6.209705,6.219972,6.228185,6.230239


### (B) Fixation Analysis
#### (3) Distances-from-Target across all fixations

In [7]:
percentiles = [0.05, 0.25, 0.5, 0.75, 0.9, 0.95]

dva_cols = [col for col in fixations.columns if col.endswith("distance_dva")]
min_dists = pd.concat([fixations[["subject", "trial", "eye", "event"]], fixations[dva_cols].min(axis=1).rename("distance")], axis=1)
fixation_dist_summary = (
    pd.concat([
        min_dists["distance"].describe(percentiles).rename("all"),
        min_dists.groupby("subject")["distance"].describe(percentiles).T,
    ], axis=1)
).T

print("All Fixations:")
fixation_dist_summary

All Fixations:


Unnamed: 0,count,mean,std,min,5%,25%,50%,75%,90%,95%,max
all,116947.0,8.472295,5.217201,0.006736,0.817362,4.529492,8.023733,11.757416,15.296785,17.799215,43.422
2,12482.0,7.86388,4.989668,0.014836,0.670461,3.992184,7.518766,10.957089,14.360563,16.910656,28.95004
12,11733.0,8.639695,5.282517,0.021815,0.98432,4.770353,8.085927,11.792952,15.34402,18.479218,31.039929
13,8135.0,8.547193,5.522265,0.041144,0.728294,4.449,8.025514,11.798203,15.770773,18.720618,32.371653
14,9513.0,9.016266,5.347366,0.039167,0.842865,4.987896,8.49842,12.553002,16.191683,18.38361,43.422
15,7616.0,8.245533,5.062607,0.049419,0.783031,4.280422,7.964702,11.59968,15.154948,17.057289,39.595361
16,9973.0,8.743079,4.96418,0.007104,0.937009,4.985892,8.4338,12.005936,15.403083,17.687778,27.668284
17,7815.0,8.095184,5.138432,0.015262,0.880142,4.184559,7.579997,11.337777,14.953332,17.18834,29.743946
18,11202.0,8.231489,5.129308,0.014692,0.7223,4.453289,7.606037,11.328712,15.024183,17.708345,30.078966
19,7266.0,8.019145,5.344337,0.022844,0.757255,3.716248,7.650134,11.40859,14.759613,17.316979,31.603614


#### (4) Distances-from-Target during identification-fixations
##### find identification fixations
fixations where either:
- the subject performed an identification action during the fixation
- the subject performed an identification action immediately after the fixation

In [8]:
fixs_with_ident_time = fixations.copy()
fixs_with_ident_time["target"] = fixs_with_ident_time[dva_cols].idxmin(axis=1).str.replace("_distance_dva", "")
fixs_with_ident_time["distance_dva"] = fixs_with_ident_time[dva_cols].min(axis=1)
fixs_with_ident_time = (
    fixs_with_ident_time
    .drop(columns=[col for col in fixs_with_ident_time.columns if "_distance_" in col])
    .merge(
        idents.loc[
            idents["identification_category"] == "hit", ["subject", "trial", "target", "time"]
        ], on=["subject", "trial", "target"], how="left"
    )
)

fixs_with_ident_time.loc[:, "is_during"] = (fixs_with_ident_time["start_time"] <= fixs_with_ident_time["time"]) & (fixs_with_ident_time["time"] <= fixs_with_ident_time["end_time"])

fixs_with_ident_time.loc[:, "end_to_ident_diff"] = fixs_with_ident_time["time"] - fixs_with_ident_time["end_time"]
fixs_with_ident_time.loc[:, "is_immediately_preceding"] = False
immediately_preceding_idxs = (
    fixs_with_ident_time
    .loc[(0 <= fixs_with_ident_time["end_to_ident_diff"]) & (fixs_with_ident_time["end_to_ident_diff"] <= 1000)]    # max 1 sec
    .groupby(["subject", "trial", "eye", "target"])["end_to_ident_diff"]
    .idxmin()
    .values
)
fixs_with_ident_time.loc[immediately_preceding_idxs, "is_immediately_preceding"] = True
# fixs_with_ident_time.drop(columns=["end_to_ident_diff"], inplace=True)

In [9]:
ident_fixs = fixs_with_ident_time.loc[fixs_with_ident_time["is_during"]]
ident_fixs_dist_summary = (
    pd.concat([
        ident_fixs["distance_dva"].describe(percentiles).rename("all"),
        ident_fixs.groupby("subject")["distance_dva"].describe(percentiles).T,
    ], axis=1)
).T

print("Identification Fixations:")
print(f"When subjects fixated on a target and identified it during the fixation, {ident_fixs_dist_summary.loc['all', '95%']:.2f} DVA of distance covers 95% of the cases.")
ident_fixs_dist_summary

Identification Fixations:
When subjects fixated on a target and identified it during the fixation, 1.20 DVA of distance covers 95% of the cases.


Unnamed: 0,count,mean,std,min,5%,25%,50%,75%,90%,95%,max
all,2332.0,0.562374,0.336541,0.006736,0.121325,0.313937,0.492921,0.763477,1.030108,1.196082,2.338724
2,202.0,0.619955,0.3361,0.038549,0.138365,0.349042,0.585863,0.868512,1.074911,1.170053,1.484959
12,182.0,0.64682,0.371051,0.021815,0.153902,0.358095,0.59228,0.868074,1.195264,1.310916,1.630773
13,188.0,0.579055,0.313633,0.041144,0.159027,0.35952,0.507014,0.778303,1.061292,1.172341,1.499542
14,189.0,0.409908,0.236709,0.042179,0.108185,0.224672,0.377349,0.542055,0.723189,0.816611,1.366193
15,188.0,0.665742,0.336132,0.049419,0.209048,0.40986,0.624934,0.88696,1.082134,1.26717,2.098332
16,180.0,0.39228,0.219487,0.039001,0.082794,0.215285,0.348203,0.565713,0.710165,0.766271,0.974541
17,192.0,0.568567,0.28798,0.027277,0.186735,0.341129,0.530521,0.729324,0.9833,1.111499,1.534545
18,235.0,0.448369,0.271079,0.014692,0.088707,0.26437,0.404838,0.577704,0.861596,0.981293,1.543633
19,217.0,0.673631,0.367731,0.022844,0.187287,0.38236,0.641832,0.951275,1.150034,1.270743,1.896369


#### (5) Distances-from-Target during pre-identification-fixations
Distance from target for fixations that immediately precede an identification fixation.

In [10]:
preceding_fixs = fixs_with_ident_time.loc[fixs_with_ident_time["is_immediately_preceding"]]
preceding_fixs_dist_summary = (
    pd.concat([
        preceding_fixs["distance_dva"].describe(percentiles).rename("all"),
        preceding_fixs.groupby("subject")["distance_dva"].describe(percentiles).T,
    ], axis=1)
).T

print("Preceding Identification Fixations:")
print(f"When subjects fixated on a target and identified it immediately after the fixation, {preceding_fixs_dist_summary.loc['all', '95%']:.2f} DVA of distance covers 95% of the cases.")
preceding_fixs_dist_summary

Preceding Identification Fixations:
When subjects fixated on a target and identified it immediately after the fixation, 4.04 DVA of distance covers 95% of the cases.


Unnamed: 0,count,mean,std,min,5%,25%,50%,75%,90%,95%,max
all,2308.0,1.354491,1.815415,0.007757,0.217856,0.530124,0.899552,1.459119,2.391826,4.037736,19.686305
2,209.0,1.557768,1.878667,0.038814,0.224131,0.560671,0.966678,1.693945,3.35842,5.43057,11.76853
12,179.0,1.67762,2.171371,0.028722,0.37063,0.644388,1.121407,1.645219,2.57545,6.490197,15.175831
13,186.0,0.946847,0.644608,0.057448,0.250108,0.534686,0.828726,1.201802,1.596507,1.993493,5.284142
14,177.0,1.638309,2.509709,0.039167,0.248053,0.502921,0.874122,1.724616,3.533598,4.793741,16.502108
15,188.0,1.954133,3.06051,0.076795,0.240814,0.567772,0.948536,1.84852,3.865199,6.443325,16.967226
16,172.0,1.495051,2.322052,0.09216,0.20342,0.547694,0.926373,1.464096,2.931335,4.588137,19.686305
17,188.0,1.922438,1.998972,0.1217,0.32368,0.747241,1.365299,2.122593,4.405687,6.945506,10.451534
18,233.0,0.883444,0.755728,0.036474,0.143328,0.402359,0.704171,1.033216,1.734441,2.465943,4.267241
19,211.0,1.376119,1.610629,0.039494,0.255515,0.668981,1.077622,1.516658,2.109664,3.630567,14.663008


### Visualize

In [11]:
column_titles = ["All Fixations", "Co-Occurring with Identification", "Preceding Identification"]
fig = make_subplots(
    rows=2, cols=len(column_titles), column_titles=column_titles,
    shared_xaxes=True, shared_yaxes=False,
)

for c in range(len(column_titles)):
    if c == 0:
        data = fixs_with_ident_time
    elif c == 1:
        data = fixs_with_ident_time[fixs_with_ident_time["is_during"]]
    elif c == 2:
        data = fixs_with_ident_time[fixs_with_ident_time["is_immediately_preceding"]]
    else:
        raise ValueError(f"Unexpected column index {c}.")
    # top: distribution across all subjects
    fig.add_trace(
        row=1, col=c+1, trace=go.Violin(
            y0="distance", x=data["distance_dva"],
            name="All Subjects", legendgroup="All Subjects",
            text=data.apply(
                lambda row: f"Subject: {row['subject']}<br>"
                            f"Trial: {row['trial']}<br>"
                            f"Target: {row['target']}<br>"
                            f"Distance: {row['distance_dva']:.2f} DVA",
                axis=1
            ),
            marker=dict(color=cnfg.get_discrete_color("all")),
            width=1.75, orientation="h", side="positive", spanmode='hard',
            box=dict(visible=False),
            meanline=dict(visible=True),
            points="all", pointpos=-0.5,
            showlegend=c==0, hoverinfo="x+y+text",

        )
    )
    # bottom: distribution per subject
    for subj_id in data[cnfg.SUBJECT_STR].unique():
        subj_string = f"{cnfg.SUBJECT_STR.capitalize()} {subj_id:02d}"
        subj_data = data[data[cnfg.SUBJECT_STR] == subj_id]
        texts = subj_data.apply(
            lambda row: f"{subj_string}<br>"
                        f"Trial: {row['trial']}<br>"
                        f"Target: {row['target']}<br>"
                        f"Distance: {row['distance_dva']:.2f} DVA",
            axis=1
        )
        fig.add_trace(
            row=2, col=c+1, trace=go.Violin(
                y0="distance", x=subj_data["distance_dva"],
                text=texts,
                name=subj_string, legendgroup=subj_string,
                marker=dict(color=cnfg.get_discrete_color(subj_id, loop=True), opacity=0.5),
                width=1.75, orientation="h", side="positive", spanmode='hard',
                box=dict(visible=False),
                meanline=dict(visible=True),
                points="all", pointpos=-0.5,
                showlegend=c==0, hoverinfo="x+y+text"
            )
        )

# update visuals
fig.update_annotations(font=cnfg.AXIS_LABEL_FONT)
fig.update_yaxes(showticklabels=False)  # Hide y-axis labels
fig.update_xaxes(
    title=None, showline=False,
    showgrid=True, gridcolor=cnfg.GRID_LINE_COLOR, gridwidth=cnfg.GRID_LINE_WIDTH,
    zeroline=False, zerolinecolor=cnfg.GRID_LINE_COLOR, zerolinewidth=cnfg.ZERO_LINE_WIDTH,
    tickfont=cnfg.AXIS_TICK_FONT,
)
fig.update_layout(
    width=1400, height=650,
    title=dict(text="Distance on Fixations", font=cnfg.TITLE_FONT),
    paper_bgcolor='rgba(0, 0, 0, 0)',
    # plot_bgcolor='rgba(0, 0, 0, 0)',
    showlegend=True,
)

fig.show()