In [1]:
import numpy as np
import pandas as pd
import plotly.io as pio

import config as cnfg

pio.renderers.default = "notebook"      # or "browser"

In [2]:
from preprocess.read_data import read_saved_data
_targets, actions, metadata, idents, _fixations, _visits = read_saved_data(cnfg.OUTPUT_PATH)

### Gaze Coverage
Find subjects with many trials with gaze coverage below a `80%` threshold.

In [77]:
GC_THRESHOLD = 80
has_low_coverage = (
    metadata
    .copy()
    .assign(low_coverage=lambda df: df["gaze_coverage"] < GC_THRESHOLD)
    .set_index(["subject", "trial"])["low_coverage"]
)
num_low_coverage = has_low_coverage.groupby("subject").sum()
print(num_low_coverage.to_dict())

has_low_coverage

{2: 0, 12: 0, 13: 0, 14: 0, 15: 0, 16: 0, 17: 0, 18: 0, 19: 0, 20: 0, 21: 0, 22: 0}


subject  trial
2        1        False
         2        False
         3        False
         4        False
         5        False
                  ...  
22       56       False
         57       False
         58       False
         59       False
         60       False
Name: low_coverage, Length: 720, dtype: bool

### No-Action Trials
Find subjects with many trials with no recorded actions.

In [74]:
has_actions = (
    metadata[["subject", "trial"]]
    .copy()
    .assign(
        has_actions=lambda df: df.apply(
            lambda row: (row["subject"], row["trial"]) in actions[["subject", "trial"]].itertuples(index=False),
            axis=1,
        )
    )
    .set_index(["subject", "trial"])["has_actions"]
)
no_actions = (~has_actions).rename("no_actions")
num_no_actions = no_actions.groupby("subject").sum()
print(num_no_actions.to_dict())

no_actions

{2: 1, 12: 1, 13: 3, 14: 3, 15: 6, 16: 4, 17: 6, 18: 0, 19: 1, 20: 7, 21: 4, 22: 6}


subject  trial
2        1        False
         2        False
         3        False
         4        False
         5        False
                  ...  
22       56        True
         57       False
         58       False
         59       False
         60       False
Name: no_actions, Length: 720, dtype: bool

### Bad Action Trials
Find subjects with many trials with bad actions (actions like "mark and reject a target", etc.).

#### (a) exclusion criteria = all actions except "mark and confirm"

In [89]:
bad_actions = actions[np.isin(actions["action"], bad_actions)]

# metadata[["subject", "trial"]].copy().assign(
#     has_bad_actions=lambda df: df.apply(
#         lambda row: row,
#         axis=1
#     )
# )

Unnamed: 0,subject,trial,time,action,to_trial_end
71,2,41,1586,4,11276
75,2,42,18961,4,2552
83,2,47,4809,4,16624
88,2,48,10827,4,11759
89,2,48,12547,4,10039
...,...,...,...,...,...
1189,20,58,2797,4,19196
1279,21,50,2646,4,19467
1289,21,54,8098,4,13910
1332,22,17,8632,4,13436


In [80]:
from data_models.LWSEnums import SubjectActionCategoryEnum

bad_actions = [act for act in SubjectActionCategoryEnum if act != SubjectActionCategoryEnum.MARK_AND_CONFIRM]

has_bad_actions = (
    actions
    .groupby(["subject", "trial"])["action"]
    .apply(lambda acts: np.isin(acts, bad_actions))
    .map(any)
    .rename("exclude_by_bad_actions")
)
num_bad_actions = has_bad_actions.groupby("subject").sum()
print(num_bad_actions.to_dict())

has_bad_actions

{2: 8, 12: 8, 13: 7, 14: 5, 15: 5, 16: 3, 17: 8, 18: 12, 19: 24, 20: 2, 21: 2, 22: 2}


subject  trial
2        1        False
         2        False
         3        False
         4        False
         5        False
                  ...  
22       55       False
         57       False
         58       False
         59       False
         60       False
Name: exclude_by_bad_actions, Length: 678, dtype: bool

#### (b) exclusion criteria = only "mark and reject" actions - TOO LENIENT

In [81]:
bad_actions = [SubjectActionCategoryEnum.MARK_AND_REJECT]

has_bad_actions2 = (
    actions
    .groupby(["subject", "trial"])["action"]
    .apply(lambda acts: np.isin(acts, bad_actions))
    .map(any)
    .rename("exclude_by_bad_actions")
)
num_bad_actions2 = has_bad_actions2.groupby("subject").sum()
# print(num_bad_actions2.to_dict())
# has_bad_actions2

{2: 7, 12: 8, 13: 7, 14: 5, 15: 5, 16: 3, 17: 8, 18: 12, 19: 24, 20: 2, 21: 2, 22: 2}


subject  trial
2        1        False
         2        False
         3        False
         4        False
         5        False
                  ...  
22       55       False
         57       False
         58       False
         59       False
         60       False
Name: exclude_by_bad_actions, Length: 678, dtype: bool

### False Alarms
Find subjects with many trials with false alarm identifications.

In [83]:
from helpers.sdt import calc_sdt_class_per_trial

false_alarms = calc_sdt_class_per_trial(metadata, idents, "false_alarm")
has_false_alarms = (false_alarms["count"] > 0).rename("exclude_by_false_alarms")
has_false_alarms = (
    pd.concat([false_alarms[["subject", "trial"]], has_false_alarms], axis=1)
    .set_index(["subject", "trial"])["exclude_by_false_alarms"]
)
num_false_alarms = has_false_alarms.groupby("subject").sum()
print(num_false_alarms.to_dict())

has_false_alarms

{2: 24, 12: 29, 13: 10, 14: 7, 15: 14, 16: 0, 17: 14, 18: 2, 19: 28, 20: 15, 21: 4, 22: 24}


subject  trial
2        1        False
         2        False
         3        False
         4         True
         5        False
                  ...  
22       56       False
         57       False
         58        True
         59        True
         60       False
Name: exclude_by_false_alarms, Length: 720, dtype: bool

### Summary of Exclusion Criteria
Combine all exclusion criteria to identify subjects with many trials to be excluded.

In [88]:
exclusion_criteria = pd.concat([has_low_coverage, no_actions, has_bad_actions, has_false_alarms,], axis=1)
exclusion_criteria["to_exclude"] = exclusion_criteria.any(axis=1)

exclusion_summary = exclusion_criteria.groupby("subject")["to_exclude"].sum()
print(exclusion_summary.to_dict())

exclusion_criteria

{2: 32, 12: 32, 13: 18, 14: 14, 15: 22, 16: 7, 17: 24, 18: 13, 19: 38, 20: 24, 21: 10, 22: 31}


Unnamed: 0_level_0,Unnamed: 1_level_0,low_coverage,no_actions,exclude_by_bad_actions,exclude_by_false_alarms,to_exclude
subject,trial,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2,1,False,False,False,False,False
2,2,False,False,False,False,False
2,3,False,False,False,False,False
2,4,False,False,False,True,True
2,5,False,False,False,False,False
...,...,...,...,...,...,...
22,56,False,True,,False,True
22,57,False,False,False,False,False
22,58,False,False,False,True,True
22,59,False,False,False,True,True
