In [8]:
import pandas as pd
import glob
import numpy as np
import copy
import matplotlib.pyplot as plt

In [9]:
time_col = 'TIME'
position_cols = ['BPOGX', 'BPOGY']
reading_col = 'FPOGID'
fixation_col = 'Fixation'
fixation_id_col = 'Fixation_ID'
velocity_col = 'Velocity'
distances_col = 'Distances'
periods_col = 'Periods'
# valid flag
valid_col = 'BPOGV'

# new pos cols
abs_position_cols = ['ABS_X', 'ABS_Y']

In [10]:
pixel_width = 1920
pixel_height = 1080
params = [pixel_width, pixel_height]

In [11]:
dfs = {}
for file in glob.glob("data/*/*text*.csv"):
    df = pd.read_csv(file)
    df_cols = df.columns.values
    df_cols[4] = 'TIME'
    df.columns = df_cols
    dfs[file] = df[position_cols + [reading_col, time_col, valid_col]]
    dfs[file].index.name = "Index"
    print(dfs[file].index.name)
    
for filename, df in dfs.items():
    print(filename)
    for a, r, p in zip(abs_position_cols, position_cols, params):
        df[a] = df[r]*p

Index
Index
Index
Index
Index
Index
Index
Index
Index
Index
data\Alex\Alex_text1_all_gaze_labeled.csv
data\Alex\Alex_text2_all_gaze_labeled.csv
data\Diana\Diana_text1_all_gaze_labeled.csv
data\Diana\Diana_text2_all_gaze_labeled.csv
data\Misha\Misha_text1_all_gaze_labeled.csv
data\Misha\Misha_text2_all_gaze_labeled.csv
data\Polina\Polina_text1_all_gaze_labeled.csv
data\Polina\Polina_text2_all_gaze_labeled.csv
data\Valik\Valik_text1_all_gaze_labeled.csv
data\Valik\Valik_text2_all_gaze_labeled.csv


In [12]:
for k in dfs.keys():
    dfs[k]['Name'] = k
    dfs[k].set_index('Name',append=True,inplace=True)
    
data = pd.concat(dfs.values())
data = data.reorder_levels(['Name','Index'])

In [20]:
data.groupby(['Name'])[valid_col].value_counts()

Name                                           BPOGV
data\Alex\Alex_text1_all_gaze_labeled.csv      1        14387
                                               0          974
data\Alex\Alex_text2_all_gaze_labeled.csv      1         6803
                                               0          534
data\Diana\Diana_text1_all_gaze_labeled.csv    1        10094
                                               0         1013
data\Diana\Diana_text2_all_gaze_labeled.csv    1         5558
                                               0          457
data\Misha\Misha_text1_all_gaze_labeled.csv    1        12754
                                               0         2390
data\Misha\Misha_text2_all_gaze_labeled.csv    1         6576
                                               0          648
data\Polina\Polina_text1_all_gaze_labeled.csv  1        13228
                                               0          568
data\Polina\Polina_text2_all_gaze_labeled.csv  1         4581
                 

In [21]:
data_filtered = data.loc[data[valid_col]==1,:]
data_filtered.groupby(['Name'])[valid_col].value_counts()

Name                                           BPOGV
data\Alex\Alex_text1_all_gaze_labeled.csv      1        14387
data\Alex\Alex_text2_all_gaze_labeled.csv      1         6803
data\Diana\Diana_text1_all_gaze_labeled.csv    1        10094
data\Diana\Diana_text2_all_gaze_labeled.csv    1         5558
data\Misha\Misha_text1_all_gaze_labeled.csv    1        12754
data\Misha\Misha_text2_all_gaze_labeled.csv    1         6576
data\Polina\Polina_text1_all_gaze_labeled.csv  1        13228
data\Polina\Polina_text2_all_gaze_labeled.csv  1         4581
data\Valik\Valik_text1_all_gaze_labeled.csv    1        11228
data\Valik\Valik_text2_all_gaze_labeled.csv    1         5603
Name: BPOGV, dtype: int64

<h3>Calculate distances between points and velocity

In [None]:
diff_cols = ['DIFF_'+c for c in abs_position_cols]
for filename, df in dfs.items():
    for d, c in zip(diff_cols, abs_position_cols):
        df[d] = df[c].diff().shift(-1)
    df[periods_col] = df[time_col].diff().shift(-1)
    df[distances_col] = np.square(df[diff_cols]).sum(axis=1).pow(1./2)
    df[velocity_col] = df[distances_col] / df[periods_col]
    df = df.fillna(method='ffill')