# Getting started with gaze dataset 

In this example, we will show examples of the main functionalities included in the emtk.

This includes:
- Parsing raw data from the gaze dataset to pandas dataframe.
- Visualizing raw data, filtered fixation, saccades, and AOIs visualization.
- Applying fixation correction through setting offset.
- Generating AOIs for any gaze stimuli.
- Adding text tokens to generated AOIs.
- Adding srcML tags to AOIs and tokens.
- Performing hit test between fixations and AOIs. 

In [22]:
%load_ext autoreload
%autoreload 2  

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Importing the tool

In [23]:
import pandas as pd
from g2c import parsers, visualization, util, aoi

# ignore warnings
import warnings
warnings.filterwarnings('ignore')

# Parsing raw data from the UNL dataset

In [6]:
eye_events, samples = parsers.UNL_UM(sample_size = 50)

Processing files:   0%|[32m          [0m| 0/48 [00:00<?, ?file/s]

Processing files:   2%|[32m▏         [0m| 1/48 [00:02<01:38,  2.09s/file]

Processed file: UNL-UM_UO P103.tsv


Processing files:   4%|[32m▍         [0m| 2/48 [00:03<01:28,  1.93s/file]

Processed file: UNL-UM_UO P105.tsv


Processing files:   6%|[32m▋         [0m| 3/48 [00:06<01:46,  2.36s/file]

Processed file: UNL-UM_UO P107.tsv


Processing files:   8%|[32m▊         [0m| 4/48 [00:09<01:58,  2.70s/file]

Processed file: UNL-UM_UO P113.tsv


Processing files:  10%|[32m█         [0m| 5/48 [00:14<02:18,  3.23s/file]

Processed file: UNL-UM_UO P124.tsv


Processing files:  12%|[32m█▎        [0m| 6/48 [00:19<02:39,  3.80s/file]

Processed file: UNL-UM_UO P127.tsv


Processing files:  15%|[32m█▍        [0m| 7/48 [00:24<02:53,  4.23s/file]

Processed file: UNL-UM_UO P132.tsv


Processing files:  17%|[32m█▋        [0m| 8/48 [00:31<03:24,  5.12s/file]

Processed file: UNL-UM_UO P135.tsv


Processing files:  19%|[32m█▉        [0m| 9/48 [00:38<03:49,  5.88s/file]

Processed file: UNL-UM_UO P139.tsv


Processing files:  21%|[32m██        [0m| 10/48 [00:45<03:57,  6.25s/file]

Processed file: UNL-UM_UO P140.tsv


Processing files:  23%|[32m██▎       [0m| 11/48 [00:53<04:12,  6.81s/file]

Processed file: UNL-UM_UO P142.tsv


Processing files:  25%|[32m██▌       [0m| 12/48 [01:02<04:25,  7.38s/file]

Processed file: UNL-UM_UO P143.tsv


Processing files:  27%|[32m██▋       [0m| 13/48 [01:11<04:35,  7.86s/file]

Processed file: UNL-UM_UO P147.tsv


Processing files:  29%|[32m██▉       [0m| 14/48 [01:22<04:57,  8.76s/file]

Processed file: UNL-UM_UO P149.tsv


Processing files:  31%|[32m███▏      [0m| 15/48 [01:32<05:05,  9.26s/file]

Processed file: UNL-UM_UO P152.tsv


Processing files:  33%|[32m███▎      [0m| 16/48 [01:43<05:09,  9.68s/file]

Processed file: UNL-UM_UO P154.tsv


Processing files:  35%|[32m███▌      [0m| 17/48 [01:54<05:13, 10.12s/file]

Processed file: UNL-UM_UO P155.tsv


Processing files:  38%|[32m███▊      [0m| 18/48 [02:07<05:24, 10.81s/file]

Processed file: UNL-UM_UO P157.tsv


Processing files:  40%|[32m███▉      [0m| 19/48 [02:20<05:32, 11.48s/file]

Processed file: UNL-UM_UO P160.tsv


Processing files:  42%|[32m████▏     [0m| 20/48 [02:33<05:34, 11.94s/file]

Processed file: UNL-UM_UO P170.tsv


Processing files:  44%|[32m████▍     [0m| 21/48 [02:47<05:40, 12.61s/file]

Processed file: UNL-UM_UO P174.tsv


Processing files:  46%|[32m████▌     [0m| 22/48 [03:00<05:35, 12.92s/file]

Processed file: UNL-UM_UO P181.tsv


Processing files:  48%|[32m████▊     [0m| 23/48 [03:15<05:35, 13.41s/file]

Processed file: UNL-UM_UO P183.tsv


Processing files:  50%|[32m█████     [0m| 24/48 [03:29<05:29, 13.73s/file]

Processed file: UNL-UM_UO P192.tsv


Processing files:  52%|[32m█████▏    [0m| 25/48 [03:44<05:24, 14.10s/file]

Processed file: UNL-UM_UO P194.tsv


Processing files:  54%|[32m█████▍    [0m| 26/48 [04:00<05:22, 14.65s/file]

Processed file: UNL-UM_UO P195.tsv


Processing files:  56%|[32m█████▋    [0m| 27/48 [04:17<05:19, 15.23s/file]

Processed file: UNL-UM_UO P196.tsv


Processing files:  58%|[32m█████▊    [0m| 28/48 [04:33<05:10, 15.53s/file]

Processed file: UNL-UM_UO P198.tsv


Processing files:  60%|[32m██████    [0m| 29/48 [04:50<05:02, 15.94s/file]

Processed file: UNL-UM_UO P202.tsv


Processing files:  62%|[32m██████▎   [0m| 30/48 [05:07<04:53, 16.33s/file]

Processed file: UNL-UM_UO P220.tsv


Processing files:  65%|[32m██████▍   [0m| 31/48 [05:25<04:46, 16.83s/file]

Processed file: UNL-UM_UO P223.tsv


Processing files:  67%|[32m██████▋   [0m| 32/48 [05:44<04:36, 17.25s/file]

Processed file: UNL-UM_UO P224.tsv


Processing files:  69%|[32m██████▉   [0m| 33/48 [06:02<04:25, 17.72s/file]

Processed file: UNL-UM_UO P225.tsv


Processing files:  71%|[32m███████   [0m| 34/48 [06:22<04:17, 18.39s/file]

Processed file: UNL-UM_UO P231.tsv


Processing files:  73%|[32m███████▎  [0m| 35/48 [06:41<04:01, 18.59s/file]

Processed file: UNL-UM_UO P243.tsv


Processing files:  75%|[32m███████▌  [0m| 36/48 [07:01<03:48, 19.03s/file]

Processed file: UNL-UM_UO P246.tsv


Processing files:  77%|[32m███████▋  [0m| 37/48 [07:22<03:33, 19.42s/file]

Processed file: UNL-UM_UO P247.tsv


Processing files:  79%|[32m███████▉  [0m| 38/48 [07:44<03:22, 20.24s/file]

Processed file: UNL-UM_UO P248.tsv


Processing files:  81%|[32m████████▏ [0m| 39/48 [08:07<03:09, 21.11s/file]

Processed file: UNL-UM_UO P250.tsv


Processing files:  83%|[32m████████▎ [0m| 40/48 [08:30<02:52, 21.58s/file]

Processed file: UNL-UM_UO P252.tsv


Processing files:  85%|[32m████████▌ [0m| 41/48 [08:53<02:33, 21.98s/file]

Processed file: UNL-UM_UO P253.tsv


Processing files:  88%|[32m████████▊ [0m| 42/48 [09:17<02:16, 22.68s/file]

Processed file: UNL-UM_UO P254.tsv


Processing files:  90%|[32m████████▉ [0m| 43/48 [09:41<01:54, 22.95s/file]

Processed file: UNL-UM_UO P255.tsv


Processing files:  92%|[32m█████████▏[0m| 44/48 [10:05<01:33, 23.46s/file]

Processed file: UNL-UM_UO P256.tsv


Processing files:  94%|[32m█████████▍[0m| 45/48 [10:30<01:11, 23.97s/file]

Processed file: UNL-UM_UO P257.tsv


Processing files:  96%|[32m█████████▌[0m| 46/48 [10:56<00:49, 24.53s/file]

Processed file: UNL-UM_UO P260.tsv


Processing files:  98%|[32m█████████▊[0m| 47/48 [11:23<00:25, 25.29s/file]

Processed file: UNL-UM_UO P262.tsv


Processing files: 100%|[32m██████████[0m| 48/48 [11:51<00:00, 14.82s/file]

Processed file: UNL-UM_UO P266.tsv





In [8]:
samples.head()

Unnamed: 0,eye_tracker,experiment_id,participant_id,filename,trial_id,stimuli_module,stimuli_name,Recording timestamp,Project name,Export date,...,Presented Media name,Presented Media width,Presented Media height,Presented Media position X (DACSpx),Presented Media position Y (DACSpx),Original Media width,Original Media height,Eye movement type,Gaze event duration,Eye movement type index
0,Tobii I-VT (Fixation),17623623_P103,17623623_P103,UNL-UM_UO P103.tsv,Q5,datasets/UNL_UM24_30July/stimuli,Q5 (localhost).png,237868,UNL-UM_UO,2025/3/7,...,,,,,,,,Fixation,567.0,413.0
1,Tobii I-VT (Fixation),17623623_P103,17623623_P103,UNL-UM_UO P103.tsv,Q5,datasets/UNL_UM24_30July/stimuli,Q5 (localhost).png,237875,UNL-UM_UO,2025/3/7,...,ProLabQs Recording40.mp4,1920.0,1080.0,0.0,0.0,1920.0,1080.0,Saccade,42.0,655.0
2,Tobii I-VT (Fixation),17623623_P103,17623623_P103,UNL-UM_UO P103.tsv,Q5,datasets/UNL_UM24_30July/stimuli,Q5 (localhost).png,237882,UNL-UM_UO,2025/3/7,...,,,,,,,,Saccade,42.0,655.0
3,Tobii I-VT (Fixation),17623623_P103,17623623_P103,UNL-UM_UO P103.tsv,Q5,datasets/UNL_UM24_30July/stimuli,Q5 (localhost).png,237883,UNL-UM_UO,2025/3/7,...,ProLabQs Recording40.mp4,1920.0,1080.0,0.0,0.0,1920.0,1080.0,Saccade,42.0,655.0
4,Tobii I-VT (Fixation),17623623_P103,17623623_P103,UNL-UM_UO P103.tsv,Q5,datasets/UNL_UM24_30July/stimuli,Q5 (localhost).png,237891,UNL-UM_UO,2025/3/7,...,ProLabQs Recording40.mp4,1920.0,1080.0,0.0,0.0,1920.0,1080.0,Saccade,42.0,655.0


In [9]:
eye_events.head()

Unnamed: 0,eye_tracker,experiment_id,participant_id,filename,trial_id,stimuli_module,stimuli_name,timestamp,duration,x0,y0,x1,y1,token,pupil_l,pupil_r,amplitude,peak_velocity,eye_event_type
0,Tobii I-VT (Fixation),17623623_P103,17623623_P103,UNL-UM_UO P103.tsv,Q5,datasets/UNL_UM24_30July/stimuli,Q5 (localhost).png,237868,567.0,718.0,346.0,,,,,,,,fixation
1,Tobii I-VT (Fixation),17623623_P103,17623623_P103,UNL-UM_UO P103.tsv,Q5,datasets/UNL_UM24_30July/stimuli,Q5 (localhost).png,237916,192.0,543.0,342.0,,,,3.494,3.425,,,fixation
2,Tobii I-VT (Fixation),17623623_P103,17623623_P103,UNL-UM_UO P103.tsv,Q5,datasets/UNL_UM24_30July/stimuli,Q5 (localhost).png,237925,192.0,543.0,342.0,,,,3.491,3.422,,,fixation
3,Tobii I-VT (Fixation),17623623_P103,17623623_P103,UNL-UM_UO P103.tsv,Q5,datasets/UNL_UM24_30July/stimuli,Q5 (localhost).png,237933,192.0,543.0,342.0,,,,3.489,3.419,,,fixation
4,Tobii I-VT (Fixation),17623623_P103,17623623_P103,UNL-UM_UO P103.tsv,Q5,datasets/UNL_UM24_30July/stimuli,Q5 (localhost).png,237941,192.0,543.0,342.0,,,,3.485,3.421,,,fixation


In [None]:
# save the data
# samples.to_csv('samples.csv', index = False)
# eye_events.to_csv('eye_events.csv', index = False)

In [10]:
unique_trials = samples['trial_id'].unique()
unique_trials

array(['Q5', 'Q1', 'Q2A', 'Q2B', 'Q4A', 'Q4B', 'Q3'], dtype=object)

In [11]:
unique_experiment_ids_samples = samples['experiment_id'].unique()
unique_experiment_ids_samples

array(['17623623_P103', '75894071_P105', '73142188_P107', '95297148_P113',
       '26355477_P124', '28464710_P127', '7792871_P132', '50508067_P135',
       '98138536_P139', '67006224_P140', '57466117_P142', '48914592_P143',
       '50725150_P147', '87467712_P149', '19942717_P152', '53504980_P154',
       '68288415_P155', '8953417_P157', '93748365_P160', '35630584_P170',
       '25899018_P174', '14117440_P181', '71261375_P183', '66742041_P192',
       '67744331_P194', '31424792_P195', '45622102_P196', '6103787_P198',
       '37714676_P202', '56897660_P220', '61922052_P223', '92561849_P224',
       '87020395_P225', '86625353_P231', '67020883_P243', '65560620_P246',
       '17281517_P247', '15631446_P248', '67306020_P250', '22713769_P252',
       '15120427_P253', '79664715_P254', '37314099_P255', '52450561_P256',
       '78499485_P257', '19050980_P260', '2128416_P262', '82886310_P266'],
      dtype=object)

In [12]:
unique_experiment_ids_eye_events = eye_events["experiment_id"].unique()
unique_experiment_ids_eye_events            

array(['17623623_P103', '75894071_P105', '73142188_P107', '95297148_P113',
       '26355477_P124', '28464710_P127', '7792871_P132', '50508067_P135',
       '98138536_P139', '67006224_P140', '57466117_P142', '48914592_P143',
       '50725150_P147', '87467712_P149', '19942717_P152', '53504980_P154',
       '68288415_P155', '8953417_P157', '93748365_P160', '35630584_P170',
       '25899018_P174', '14117440_P181', '71261375_P183', '66742041_P192',
       '67744331_P194', '31424792_P195', '45622102_P196', '6103787_P198',
       '37714676_P202', '56897660_P220', '61922052_P223', '92561849_P224',
       '87020395_P225', '86625353_P231', '67020883_P243', '65560620_P246',
       '17281517_P247', '15631446_P248', '67306020_P250', '22713769_P252',
       '15120427_P253', '79664715_P254', '37314099_P255', '52450561_P256',
       '78499485_P257', '19050980_P260', '2128416_P262', '82886310_P266'],
      dtype=object)

# Fixation Extraction

### Extract one individual participant

In [16]:
# difine the experiment_id and trial_id
experiment_id = "28464710_P127"
trial_id = 'Q1'

In [17]:
# get the events samples for the experiment_id and trial_id
trial_data = eye_events.loc[(eye_events['experiment_id'] == experiment_id) & 
                            (eye_events['trial_id'] == trial_id)]

trial_data.head()

Unnamed: 0,eye_tracker,experiment_id,participant_id,filename,trial_id,stimuli_module,stimuli_name,timestamp,duration,x0,y0,x1,y1,token,pupil_l,pupil_r,amplitude,peak_velocity,eye_event_type
359670,Tobii I-VT (Fixation),28464710_P127,28464710_P127,UNL-UM_UO P127.tsv,Q1,datasets/UNL_UM24_30July/stimuli,Q1 (localhost).png,527928,258.0,951.0,518.0,,,,,,,,fixation
359671,Tobii I-VT (Fixation),28464710_P127,28464710_P127,UNL-UM_UO P127.tsv,Q1,datasets/UNL_UM24_30July/stimuli,Q1 (localhost).png,527931,258.0,951.0,518.0,,,,2.779,2.908,,,fixation
359672,Tobii I-VT (Fixation),28464710_P127,28464710_P127,UNL-UM_UO P127.tsv,Q1,datasets/UNL_UM24_30July/stimuli,Q1 (localhost).png,527939,258.0,951.0,518.0,,,,2.771,2.916,,,fixation
359673,Tobii I-VT (Fixation),28464710_P127,28464710_P127,UNL-UM_UO P127.tsv,Q1,datasets/UNL_UM24_30July/stimuli,Q1 (localhost).png,527944,258.0,951.0,518.0,,,,,,,,fixation
359674,Tobii I-VT (Fixation),28464710_P127,28464710_P127,UNL-UM_UO P127.tsv,Q1,datasets/UNL_UM24_30July/stimuli,Q1 (localhost).png,527948,258.0,951.0,518.0,,,,2.769,2.913,,,fixation


In [18]:
# save the data
trial_data.to_csv(f'output/unl_um/individual/fixations/{trial_id}/fixations_{trial_id}_{experiment_id}.csv', index = False)

OSError: Cannot save file into a non-existent directory: 'output\unl_um\individual\fixations\Q1'

### Extract all the fixations for each participant by trial id

In [19]:
# get the samples for the experiment_id and trial_id
experiment_range = pd.Series(unique_experiment_ids_eye_events)
# get the samples for the experiment_id and trial_id
trial_range = pd.Series(unique_trials)
experiment_range
trial_range

0     Q5
1     Q1
2    Q2A
3    Q2B
4    Q4A
5    Q4B
6     Q3
dtype: object

### Batch Export each individual fixations one by one 

In [20]:
# file path
file_path = f'output/unl_um-30July/individual/fixations'

In [15]:
util.export_fixations(eye_events, samples, experiment_range, trial_range, file_path)

Processing experiment_id: 100%|[32m██████████[0m| 48/48 [03:32<00:00,  4.43s/it]

completed





### Extract Fixations for tasks by trial range

In [12]:
# file path
file_path = f'output/unl_um-30July/group/fixations'

In [13]:
util.export_fixations(eye_events, samples, experiment_range, trial_range, file_path, bytask=True)

Processing trial_id: 100%|[32m██████████[0m| 7/7 [00:21<00:00,  3.09s/it]

completed





### Extract Group by all combinatioin

In [16]:
file_path = f'output/unl_um/all/fixations'

In [17]:
util.export_fixations(eye_events, samples, experiment_range, trial_range, 
                      file_path, byall=True)

Processing eye events: 100%|██████████| 3819687/3819687 [00:10<00:00, 368548.43it/s]


Export completed successfully.
