# On-Target Threshold
Determine the value of hyperparameter `cnfg.ON_TARGET_THRESHOLD`, which is used to decide if a gaze sample / fixation / visit is "on-target" or not.

In [1]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from statsmodels.sandbox.stats.stats_dhuard import percentileofscore
import plotly.io as pio

import config as cnfg

# pio.renderers.default = "notebook"
pio.renderers.default = "browser"

### Read data

In [2]:
from preprocess.read_data import read_saved_data
_targets, _actions, _metadata, idents, fixations, _visits = read_saved_data(cnfg.OUTPUT_PATH)

### (A) Gaze on Target Identification
#### (1) Distances-from-Target when subject performed any identification
i.e. Hits or False-Alarms

In [16]:
percentiles = [0.5, 0.75, 0.85, 0.9, 0.95, 0.99,]

not_misses = idents.loc[idents["identification_category"] != "miss"]
dist_summary = (
    pd.concat([
        not_misses["distance_dva"].describe(percentiles).rename("all"),
        not_misses.groupby("subject")["distance_dva"].describe(percentiles).T,
    ], axis=1)
).T

if not_misses['distance_dva'].min() > cnfg.ON_TARGET_THRESHOLD_DVA:
    default_threshold_percentile = 0.0
elif not_misses['distance_dva'].max() < cnfg.ON_TARGET_THRESHOLD_DVA:
    default_threshold_percentile = 100.0
else:
    default_threshold_percentile = percentileofscore(not_misses['distance_dva'], cnfg.ON_TARGET_THRESHOLD_DVA)

print(f"When subjects identified a target, {dist_summary.loc['all', '95%']:.2f} DVA of distance covers 95% of the cases.")
print(f"The default threshold of {cnfg.ON_TARGET_THRESHOLD_DVA}DVA covers {default_threshold_percentile:.2f}% of the cases.")
dist_summary

When subjects identified a target, 1.46 DVA of distance covers 95% of the cases.
The default threshold of 1.0DVA covers 96.91% of the cases.


Unnamed: 0,count,mean,std,min,50%,75%,85%,90%,95%,99%,max
all,1248.0,0.827495,1.642685,0.00525,0.539752,0.84034,1.03144,1.187911,1.457244,8.40163,23.297429
2,112.0,0.887389,1.37894,0.00525,0.638451,0.94533,1.13248,1.240759,1.367437,8.195211,9.435844
12,111.0,1.377363,2.263406,0.160329,0.863651,1.245024,1.640418,1.947023,4.304638,9.81311,19.109826
13,99.0,0.782881,1.451985,0.073701,0.538058,0.731862,0.912429,1.155491,1.408013,6.12418,13.597367
14,104.0,1.307629,3.840592,0.032188,0.37025,0.633209,0.733201,0.991192,5.101486,21.035096,23.297429
15,98.0,0.809266,1.268136,0.085205,0.675779,0.865602,1.059637,1.202942,1.37785,2.032327,12.765376
16,90.0,0.423305,0.247078,0.043483,0.350909,0.640706,0.747622,0.778273,0.858737,0.915361,0.967981
17,101.0,0.764956,1.272487,0.065453,0.567267,0.825397,1.03437,1.15349,1.318819,3.75598,12.600789
18,123.0,0.442788,0.809888,0.057758,0.359878,0.493016,0.569625,0.628443,0.778915,0.995772,9.073654
19,116.0,0.792266,0.767282,0.108211,0.701126,1.014256,1.145919,1.215355,1.350801,1.971149,7.93885


In [4]:
fig = make_subplots(
    rows=2, cols=1, shared_xaxes=True, shared_yaxes=False,
)

# top: distribution across all subjects
fig.add_trace(
    row=1, col=1, trace=go.Violin(
        y0="distance", x=not_misses["distance_dva"],
        name="All Subjects", legendgroup="All Subjects",
        text=not_misses.apply(
            lambda row: f"Subject: {row['subject']}<br>"
                        f"Trial: {row['trial']}<br>"
                        # f"Target: {row['target']}<br>"
                        f"Distance: {row['distance_dva']:.2f} DVA",
            axis=1
        ),
        marker=dict(color=cnfg.get_discrete_color("all")),
        width=1.75, orientation="h", side="positive", spanmode='hard',
        box=dict(visible=False),
        meanline=dict(visible=True),
        points="all", pointpos=-0.5,
        showlegend=True, hoverinfo="x+y+text",

    )
)

# bottom: distribution per subject
for subj_id in not_misses[cnfg.SUBJECT_STR].unique():
    subj_string = f"{cnfg.SUBJECT_STR.capitalize()} {subj_id:02d}"
    subj_data = not_misses[not_misses[cnfg.SUBJECT_STR] == subj_id]
    texts = subj_data.apply(
        lambda row: f"{subj_string}<br>"
                    f"Trial: {row['trial']}<br>"
                    # f"Target: {row['target']}<br>"
                    f"Distance: {row['distance_dva']:.2f} DVA",
        axis=1
    )
    fig.add_trace(
        row=2, col=1, trace=go.Violin(
            y0="distance", x=subj_data["distance_dva"],
            text=texts,
            name=subj_string, legendgroup=subj_string,
            marker=dict(color=cnfg.get_discrete_color(subj_id, loop=True), opacity=0.5),
            width=1.75, orientation="h", side="positive", spanmode='hard',
            box=dict(visible=False),
            meanline=dict(visible=True),
            points="all", pointpos=-0.5,
            showlegend=True, hoverinfo="x+y+text"
        )
    )

# update visuals
fig.update_annotations(font=cnfg.AXIS_LABEL_FONT)
fig.update_yaxes(showticklabels=False)  # Hide y-axis labels
fig.update_xaxes(
    title=None, showline=False,
    showgrid=True, gridcolor=cnfg.GRID_LINE_COLOR, gridwidth=cnfg.GRID_LINE_WIDTH,
    zeroline=False, zerolinecolor=cnfg.GRID_LINE_COLOR, zerolinewidth=cnfg.ZERO_LINE_WIDTH,
    tickfont=cnfg.AXIS_TICK_FONT,
)
fig.update_layout(
    width=1200, height=675,
    title=dict(text="Distance on Identification-Action", font=cnfg.TITLE_FONT),
    paper_bgcolor='rgba(0, 0, 0, 0)',
    # plot_bgcolor='rgba(0, 0, 0, 0)',
    showlegend=True,
)

fig.show()

##### (2) Distances-from-Target for `Hit`/`False Alarm` Identifications
We identify `hits` and `false-alarms` based on the distance of the gaze from the closest target when the subject performed an identification action. The threshold is set in `cnfg.ON_TARGET_THRESHOLD_DVA`.

In [5]:
hits = not_misses[not_misses["identification_category"] == "hit"]
hit_dist_summary = (
    pd.concat([
        hits["distance_dva"].describe(percentiles).rename("all"),
        hits.groupby("subject")["distance_dva"].describe(percentiles).T,
    ], axis=1)
).T

if hits['distance_dva'].min() > cnfg.ON_TARGET_THRESHOLD_DVA:
    default_threshold_percentile = 0.0
elif hits['distance_dva'].max() < cnfg.ON_TARGET_THRESHOLD_DVA:
    default_threshold_percentile = 100.0
else:
    default_threshold_percentile = percentileofscore(hits['distance_dva'], cnfg.ON_TARGET_THRESHOLD_DVA)

print(f"For identifications classified as `hits`, {hit_dist_summary.loc['all', '95%']:.2f} DVA of distance covers 95% of the cases.")
print(f"The default threshold of {cnfg.ON_TARGET_THRESHOLD_DVA}DVA covers {default_threshold_percentile:.2f}% of the cases.")
hit_dist_summary

For identifications classified as `hits`, 0.91 DVA of distance covers 95% of the cases.
The default threshold of 1.0DVA covers 100.00% of the cases.


Unnamed: 0,count,mean,std,min,50%,75%,85%,90%,95%,99%,max
all,1036.0,0.487137,0.243059,0.00525,0.452607,0.672949,0.783795,0.840587,0.91068,0.96936,0.997461
2,86.0,0.503412,0.265203,0.00525,0.493341,0.739963,0.810003,0.886413,0.936955,0.9697,0.997461
12,69.0,0.626847,0.229501,0.160329,0.599416,0.834283,0.893497,0.930264,0.973933,0.988969,0.994624
13,87.0,0.504666,0.215062,0.073701,0.503525,0.667704,0.730367,0.794001,0.872803,0.959075,0.967792
14,92.0,0.379656,0.224606,0.032188,0.349656,0.574037,0.634725,0.648207,0.721916,0.95171,0.970102
15,81.0,0.571915,0.242473,0.085205,0.625673,0.80172,0.833117,0.856174,0.894535,0.969408,0.996206
16,90.0,0.423305,0.247078,0.043483,0.350909,0.640706,0.747622,0.778273,0.858737,0.915361,0.967981
17,85.0,0.513808,0.214833,0.065453,0.489474,0.671192,0.751508,0.834215,0.876605,0.952121,0.964785
18,121.0,0.366777,0.193872,0.057758,0.359623,0.487384,0.564431,0.61266,0.736293,0.900988,0.948136
19,83.0,0.554988,0.234826,0.108211,0.554888,0.738363,0.827007,0.869774,0.932544,0.953174,0.96083


In [12]:
fas = not_misses[not_misses["identification_category"] == "false_alarm"]
fas_dist_summary = (
    pd.concat([
        fas["distance_dva"].describe(percentiles).rename("all"),
        fas.groupby("subject")["distance_dva"].describe(percentiles).T,
    ], axis=1)
).T

if fas['distance_dva'].min() > cnfg.ON_TARGET_THRESHOLD_DVA:
    default_threshold_percentile = 0.0
elif fas['distance_dva'].max() < cnfg.ON_TARGET_THRESHOLD_DVA:
    default_threshold_percentile = 100.0
else:
    default_threshold_percentile = percentileofscore(fas['distance_dva'], cnfg.ON_TARGET_THRESHOLD_DVA)

print(f"For identifications classified as `false alarms`, {fas_dist_summary.loc['all', '95%']:.2f} DVA of distance covers 95% of the cases.")
print(f"The default threshold of {cnfg.ON_TARGET_THRESHOLD_DVA}DVA covers {default_threshold_percentile:.2f}% of the cases.")
fas_dist_summary

For identifications classified as `false alarms`, 9.31 DVA of distance covers 95% of the cases.
The default threshold of 1.0DVA covers 81.45% of the cases.


Unnamed: 0,count,mean,std,min,5%,25%,50%,75%,90%,95%,max
all,208.0,2.527581,3.531862,1.00023,1.013858,1.106971,1.258304,1.637425,6.148269,9.309077,23.297429
2,26.0,2.157467,2.453285,1.004255,1.015694,1.086752,1.200634,1.317836,6.236742,8.161253,9.435844
12,41.0,2.655257,3.368085,1.040358,1.048264,1.153475,1.408372,2.000871,6.113139,8.56102,19.109826
13,12.0,2.799934,3.656573,1.005788,1.083031,1.196029,1.395568,1.855949,5.581266,9.403231,13.597367
14,11.0,9.153417,8.703243,1.00023,1.086071,1.192996,5.367477,16.537164,21.14136,22.219395,23.297429
15,17.0,1.940172,2.794903,1.001671,1.033885,1.165185,1.231515,1.398837,1.552314,3.913377,12.765376
17,16.0,2.099178,2.877518,1.03437,1.042891,1.106329,1.196332,1.372566,2.810853,5.967182,12.600789
18,2.0,5.041431,5.702425,1.009208,1.41243,3.025319,5.041431,7.057542,8.267209,8.670432,9.073654
19,31.0,1.441355,1.227889,1.005199,1.0146,1.064167,1.164273,1.32303,1.525928,1.928658,7.93885
20,16.0,1.747949,1.713233,1.003858,1.0039,1.025143,1.118335,1.245145,3.675841,6.076239,6.230239


### (B) Fixation Analysis
#### (3) Distances-from-Target across all fixations

In [7]:
percentiles = [0.05, 0.25, 0.5, 0.75, 0.9, 0.95]

dva_cols = [col for col in fixations.columns if col.endswith("distance_dva")]
min_dists = pd.concat([fixations[["subject", "trial", "eye", "event"]], fixations[dva_cols].min(axis=1).rename("distance")], axis=1)
fixation_dist_summary = (
    pd.concat([
        min_dists["distance"].describe(percentiles).rename("all"),
        min_dists.groupby("subject")["distance"].describe(percentiles).T,
    ], axis=1)
).T

print("All Fixations:")
fixation_dist_summary

All Fixations:


Unnamed: 0,count,mean,std,min,5%,25%,50%,75%,90%,95%,max
all,116947.0,8.472295,5.217201,0.006736,0.817362,4.529492,8.023733,11.757416,15.296785,17.799215,43.422
2,12482.0,7.86388,4.989668,0.014836,0.670461,3.992184,7.518766,10.957089,14.360563,16.910656,28.95004
12,11733.0,8.639695,5.282517,0.021815,0.98432,4.770353,8.085927,11.792952,15.34402,18.479218,31.039929
13,8135.0,8.547193,5.522265,0.041144,0.728294,4.449,8.025514,11.798203,15.770773,18.720618,32.371653
14,9513.0,9.016266,5.347366,0.039167,0.842865,4.987896,8.49842,12.553002,16.191683,18.38361,43.422
15,7616.0,8.245533,5.062607,0.049419,0.783031,4.280422,7.964702,11.59968,15.154948,17.057289,39.595361
16,9973.0,8.743079,4.96418,0.007104,0.937009,4.985892,8.4338,12.005936,15.403083,17.687778,27.668284
17,7815.0,8.095184,5.138432,0.015262,0.880142,4.184559,7.579997,11.337777,14.953332,17.18834,29.743946
18,11202.0,8.231489,5.129308,0.014692,0.7223,4.453289,7.606037,11.328712,15.024183,17.708345,30.078966
19,7266.0,8.019145,5.344337,0.022844,0.757255,3.716248,7.650134,11.40859,14.759613,17.316979,31.603614


#### (4) Distances-from-Target during identification-fixations
##### find identification fixations
fixations where either:
- the subject performed an identification action during the fixation
- the subject performed an identification action immediately after the fixation

In [8]:
fixs_with_ident_time = fixations.copy()
fixs_with_ident_time["target"] = fixs_with_ident_time[dva_cols].idxmin(axis=1).str.replace("_distance_dva", "")
fixs_with_ident_time["distance_dva"] = fixs_with_ident_time[dva_cols].min(axis=1)
fixs_with_ident_time = (
    fixs_with_ident_time
    .drop(columns=[col for col in fixs_with_ident_time.columns if "_distance_" in col])
    .merge(
        idents.loc[
            idents["identification_category"] == "hit", ["subject", "trial", "target", "time"]
        ], on=["subject", "trial", "target"], how="left"
    )
)

fixs_with_ident_time.loc[:, "is_during"] = (fixs_with_ident_time["start_time"] <= fixs_with_ident_time["time"]) & (fixs_with_ident_time["time"] <= fixs_with_ident_time["end_time"])

fixs_with_ident_time.loc[:, "end_to_ident_diff"] = fixs_with_ident_time["time"] - fixs_with_ident_time["end_time"]
fixs_with_ident_time.loc[:, "is_immediately_preceding"] = False
immediately_preceding_idxs = (
    fixs_with_ident_time
    .loc[(0 <= fixs_with_ident_time["end_to_ident_diff"]) & (fixs_with_ident_time["end_to_ident_diff"] <= 1000)]    # max 1 sec
    .groupby(["subject", "trial", "eye", "target"])["end_to_ident_diff"]
    .idxmin()
    .values
)
fixs_with_ident_time.loc[immediately_preceding_idxs, "is_immediately_preceding"] = True
# fixs_with_ident_time.drop(columns=["end_to_ident_diff"], inplace=True)

In [9]:
ident_fixs = fixs_with_ident_time.loc[fixs_with_ident_time["is_during"]]
ident_fixs_dist_summary = (
    pd.concat([
        ident_fixs["distance_dva"].describe(percentiles).rename("all"),
        ident_fixs.groupby("subject")["distance_dva"].describe(percentiles).T,
    ], axis=1)
).T

print("Identification Fixations:")
print(f"When subjects fixated on a target and identified it during the fixation, {ident_fixs_dist_summary.loc['all', '95%']:.2f} DVA of distance covers 95% of the cases.")
ident_fixs_dist_summary

Identification Fixations:
When subjects fixated on a target and identified it during the fixation, 0.98 DVA of distance covers 95% of the cases.


Unnamed: 0,count,mean,std,min,5%,25%,50%,75%,90%,95%,max
all,2029.0,0.496424,0.277169,0.006736,0.112938,0.28906,0.454703,0.666247,0.876305,0.981461,2.338724
2,160.0,0.53003,0.288054,0.038549,0.131345,0.288115,0.489816,0.765306,0.941329,1.006351,1.297531
12,136.0,0.527307,0.262566,0.021815,0.142911,0.335189,0.495572,0.708934,0.906566,0.972123,1.217615
13,172.0,0.530976,0.271928,0.041144,0.150215,0.347978,0.47765,0.680736,0.906162,1.056087,1.344511
14,183.0,0.392083,0.215216,0.042179,0.106613,0.222227,0.367571,0.530472,0.699638,0.758516,1.366193
15,160.0,0.57759,0.25445,0.049419,0.207063,0.375086,0.567302,0.782218,0.913637,0.969219,1.108418
16,180.0,0.39228,0.219487,0.039001,0.082794,0.215285,0.348203,0.565713,0.710165,0.766271,0.974541
17,166.0,0.519017,0.237223,0.027277,0.180389,0.329269,0.51046,0.669308,0.82345,0.926374,1.127256
18,233.0,0.445753,0.270403,0.014692,0.087837,0.264321,0.404707,0.5693,0.84909,0.984403,1.543633
19,163.0,0.561783,0.29023,0.022844,0.178646,0.346355,0.498303,0.760929,0.958697,1.070595,1.620089


#### (5) Distances-from-Target during pre-identification-fixations
Distance from target for fixations that immediately precede an identification fixation.

In [10]:
preceding_fixs = fixs_with_ident_time.loc[fixs_with_ident_time["is_immediately_preceding"]]
preceding_fixs_dist_summary = (
    pd.concat([
        preceding_fixs["distance_dva"].describe(percentiles).rename("all"),
        preceding_fixs.groupby("subject")["distance_dva"].describe(percentiles).T,
    ], axis=1)
).T

print("Preceding Identification Fixations:")
print(f"When subjects fixated on a target and identified it immediately after the fixation, {preceding_fixs_dist_summary.loc['all', '95%']:.2f} DVA of distance covers 95% of the cases.")
preceding_fixs_dist_summary

Preceding Identification Fixations:
When subjects fixated on a target and identified it immediately after the fixation, 3.80 DVA of distance covers 95% of the cases.


Unnamed: 0,count,mean,std,min,5%,25%,50%,75%,90%,95%,max
all,2003.0,1.283442,1.736633,0.007757,0.209088,0.495278,0.859482,1.40597,2.299841,3.798692,19.686305
2,165.0,1.522955,1.899072,0.038814,0.199154,0.497136,0.869727,1.714575,4.111322,5.627576,10.295021
12,133.0,1.534406,1.712427,0.028722,0.347126,0.596772,1.033246,1.668116,2.593659,4.868113,9.493544
13,170.0,0.94122,0.639363,0.057448,0.247683,0.523218,0.813986,1.232215,1.59986,1.973387,5.284142
14,170.0,1.601367,2.516978,0.039167,0.252464,0.49604,0.873392,1.704897,3.3673,4.496134,16.502108
15,159.0,1.782876,2.84596,0.076795,0.222818,0.540849,0.883613,1.885028,3.485988,5.057811,16.967226
16,172.0,1.495051,2.322052,0.09216,0.20342,0.547694,0.926373,1.464096,2.931335,4.588137,19.686305
17,166.0,1.801119,1.980107,0.1217,0.319646,0.700139,1.21983,1.950061,3.86579,7.422178,10.451534
18,231.0,0.883785,0.758658,0.036474,0.143061,0.398401,0.704171,1.033111,1.735507,2.478636,4.267241
19,158.0,1.332309,1.718917,0.039494,0.241194,0.633475,1.027331,1.509759,1.909214,2.760646,14.663008


### Visualize

In [11]:
column_titles = ["All Fixations", "Co-Occurring with Identification", "Preceding Identification"]
fig = make_subplots(
    rows=2, cols=len(column_titles), column_titles=column_titles,
    shared_xaxes=True, shared_yaxes=False,
)

for c in range(len(column_titles)):
    if c == 0:
        data = fixs_with_ident_time
    elif c == 1:
        data = fixs_with_ident_time[fixs_with_ident_time["is_during"]]
    elif c == 2:
        data = fixs_with_ident_time[fixs_with_ident_time["is_immediately_preceding"]]
    else:
        raise ValueError(f"Unexpected column index {c}.")
    # top: distribution across all subjects
    fig.add_trace(
        row=1, col=c+1, trace=go.Violin(
            y0="distance", x=data["distance_dva"],
            name="All Subjects", legendgroup="All Subjects",
            text=data.apply(
                lambda row: f"Subject: {row['subject']}<br>"
                            f"Trial: {row['trial']}<br>"
                            f"Target: {row['target']}<br>"
                            f"Distance: {row['distance_dva']:.2f} DVA",
                axis=1
            ),
            marker=dict(color=cnfg.get_discrete_color("all")),
            width=1.75, orientation="h", side="positive", spanmode='hard',
            box=dict(visible=False),
            meanline=dict(visible=True),
            points="all", pointpos=-0.5,
            showlegend=c==0, hoverinfo="x+y+text",

        )
    )
    # bottom: distribution per subject
    for subj_id in data[cnfg.SUBJECT_STR].unique():
        subj_string = f"{cnfg.SUBJECT_STR.capitalize()} {subj_id:02d}"
        subj_data = data[data[cnfg.SUBJECT_STR] == subj_id]
        texts = subj_data.apply(
            lambda row: f"{subj_string}<br>"
                        f"Trial: {row['trial']}<br>"
                        f"Target: {row['target']}<br>"
                        f"Distance: {row['distance_dva']:.2f} DVA",
            axis=1
        )
        fig.add_trace(
            row=2, col=c+1, trace=go.Violin(
                y0="distance", x=subj_data["distance_dva"],
                text=texts,
                name=subj_string, legendgroup=subj_string,
                marker=dict(color=cnfg.get_discrete_color(subj_id, loop=True), opacity=0.5),
                width=1.75, orientation="h", side="positive", spanmode='hard',
                box=dict(visible=False),
                meanline=dict(visible=True),
                points="all", pointpos=-0.5,
                showlegend=c==0, hoverinfo="x+y+text"
            )
        )

# update visuals
fig.update_annotations(font=cnfg.AXIS_LABEL_FONT)
fig.update_yaxes(showticklabels=False)  # Hide y-axis labels
fig.update_xaxes(
    title=None, showline=False,
    showgrid=True, gridcolor=cnfg.GRID_LINE_COLOR, gridwidth=cnfg.GRID_LINE_WIDTH,
    zeroline=False, zerolinecolor=cnfg.GRID_LINE_COLOR, zerolinewidth=cnfg.ZERO_LINE_WIDTH,
    tickfont=cnfg.AXIS_TICK_FONT,
)
fig.update_layout(
    width=1400, height=650,
    title=dict(text="Distance on Fixations", font=cnfg.TITLE_FONT),
    paper_bgcolor='rgba(0, 0, 0, 0)',
    # plot_bgcolor='rgba(0, 0, 0, 0)',
    showlegend=True,
)

fig.show()