In [1]:
import scipy.io
import csv
import numpy as np
import os
from tqdm import tqdm

This file is similar to the data_processing_gazecom.ipynb file as we precompute the windows for the hollywood dataset. The differences are due to different file format and that wie do not perform data augmentation.

In [2]:
def load_data(path):
    for root, dirs, files in os.walk(path):
        
        if files:
            for file in files:
                data_set = []
                if file.lower().endswith('.mat'):
                    mat = scipy.io.loadmat(root + '/' + file)
                    data = mat['ETdata']['pos'][0][0]
                    x_res = mat['ETdata']['screenRes'][0][0][0][0]
                    y_res = mat['ETdata']['screenRes'][0][0][0][1]
                    i = 0
                    while i < len(data):
                        norm_x = float(data[i][3])/x_res
                        norm_y = float(data[i][4])/y_res
                        data_set.append([norm_x,norm_y,data[i][5]])
                        #skip one sample to downsample from 500hz to 250hz
                        i += 2

                    samples, labels = process_data(data_set, window_size=251)
                    samples = np.asarray(samples)
                    labels = np.asarray(labels)
                    
                    one_hot_labels = []
                    for label in labels:
                        one_hot = one_hot_encode(int(label[0]))
                        one_hot_labels.append(one_hot)
                    np.save('/bigpool/strohmfn/lund2013_processed/' + file + "_data.npy", samples)
                    np.save('/bigpool/strohmfn/lund2013_processed/' + file + "_labels.npy", one_hot_labels)
    return data_set

In [3]:
def one_hot_encode(label):
    encoded_label = np.zeros(4)
    encoded_label[label] = 1
    return encoded_label

In [4]:
def create_training_sample(data_set, window_size, center_index):
    sample = []
    for i in range(center_index-int(window_size/2), center_index+int(window_size/2)+1):
        d = data_set[i][0:2]
        sample = sample + [d]
    return sample

In [5]:
def map_label(label):
    if int(label) == 1:
        #fixation
        return 0
    elif int(label) == 2:
        #saccade
        return 1
    elif int(label) == 3:
        #PSO -> unwanted
        return -1
    elif int(label) == 4:
        #smooth pursuit
        return 2
    elif int(label) == 5:
        #Blink -> unwanted
        return -1
    elif int(label) == 6:
        #Undefined -> unwanted
        return -1
    return -1

In [6]:
def process_data(data_set, window_size):
    samples = []
    labels = []
    
    for i in tqdm(range(int(window_size/2), len(data_set)-int(window_size/2))):
        label = map_label(data_set[i][2])
        if label != -1:
            sample = create_training_sample(data_set, window_size, i)
            samples.append(sample)
            labels.append([label])
    return samples, labels

In [7]:
load_data('/bigpool/strohmfn/lund2013/')

100%|██████████| 1773/1773 [00:01<00:00, 1528.37it/s]
100%|██████████| 1772/1772 [00:01<00:00, 1603.27it/s]
100%|██████████| 1772/1772 [00:01<00:00, 1722.67it/s]
100%|██████████| 9048/9048 [00:05<00:00, 1618.10it/s]
100%|██████████| 1160/1160 [00:00<00:00, 3151.48it/s]
100%|██████████| 17971/17971 [00:10<00:00, 1753.76it/s]
100%|██████████| 1773/1773 [00:01<00:00, 1711.45it/s]
100%|██████████| 22986/22986 [00:14<00:00, 1578.25it/s]
100%|██████████| 1161/1161 [00:00<00:00, 2948.59it/s]
100%|██████████| 1762/1762 [00:00<00:00, 2910.41it/s]
100%|██████████| 555/555 [00:00<00:00, 3083.70it/s]
100%|██████████| 1764/1764 [00:00<00:00, 1830.88it/s]
100%|██████████| 1160/1160 [00:00<00:00, 1861.53it/s]
100%|██████████| 17969/17969 [00:09<00:00, 1811.42it/s]
100%|██████████| 7040/7040 [00:04<00:00, 1700.01it/s]
100%|██████████| 1160/1160 [00:00<00:00, 2967.22it/s]
100%|██████████| 1160/1160 [00:00<00:00, 1984.83it/s]
100%|██████████| 1762/1762 [00:01<00:00, 1669.07it/s]
100%|██████████| 22986/2

[[0.48966259765625, 0.50528203125, 1.0],
 [0.490475, 0.5062662760416666, 1.0],
 [0.49050302734375, 0.5047166666666667, 1.0],
 [0.49134423828125, 0.5041889322916667, 1.0],
 [0.49026015625, 0.5035208333333333, 1.0],
 [0.49066533203125, 0.5024602864583333, 1.0],
 [0.4905533203125, 0.5022687499999999, 1.0],
 [0.49038291015625, 0.5022481770833334, 1.0],
 [0.49067470703125, 0.50173046875, 1.0],
 [0.49050283203125, 0.50193515625, 1.0],
 [0.49120908203125, 0.5042475260416667, 1.0],
 [0.49148564453125, 0.5046052083333333, 1.0],
 [0.49139951171875, 0.505348046875, 1.0],
 [0.49054267578125, 0.506470703125, 1.0],
 [0.491476953125, 0.508659375, 1.0],
 [0.4907654296875, 0.5089600260416667, 1.0],
 [0.491216015625, 0.5088397135416667, 1.0],
 [0.49092626953125, 0.5106813802083333, 1.0],
 [0.49169296875, 0.5128510416666666, 1.0],
 [0.492516015625, 0.513090234375, 1.0],
 [0.4915365234375, 0.5142453125, 1.0],
 [0.49234697265625, 0.5155110677083333, 1.0],
 [0.4926146484375, 0.5162731770833333, 1.0],
 [0.49