In [None]:
from scipy.io import arff
import numpy as np
import os
from tqdm import tqdm
import math

In this notebook we are preprocessing the windows of the gaze data so that it can be directly passed trought the network. We also define multiple functions for data augmentation like mirroring, reversing and rotating. The data augmentation via rotating is not used for the final results presented in the paper as it had an negative impact on the performance. The gaze data of the GazeCom dataset is available in an arff format. We load and process one arff file at a time and save the processed and augmented data to disk as a numpy file together with a separate file containing the labels.

In [None]:
def load_data(path):
    for root, dirs, files in os.walk(path):
        data_set = []
        
        if files:
            for file in files:
                if file.lower().endswith('.arff'):
                    data, meta = arff.loadarff(root + "/" + file)
                    for d in data:
                        data_set.append([d[1],d[2],d[6]])
            
            samples, labels = process_data(data_set, window_size=251)

            samples = np.asarray(samples)
            labels = np.asarray(labels)

            one_hot_labels = []
            for label in labels:
                one_hot = one_hot_encode(int(label[0]))
                one_hot_labels.append(one_hot)
            
            name = os.path.basename(root)
            os.makedirs("/bigpool/strohmfn/gazecom_processed/train/" + name + "/")
            np.save("/bigpool/strohmfn/gazecom_processed/train/" + name + "/" + name + ".npy", samples)
            np.save("/bigpool/strohmfn/gazecom_processed/train/" + name + "/" + name + "_labels.npy", one_hot_labels)
            

In [None]:
def one_hot_encode(label):
    encoded_label = np.zeros(4)
    encoded_label[label-1] = 1
    return encoded_label

In [None]:
def create_training_sample(data_set, window_size, center_index):
    sample = []
    for i in range(center_index-int(window_size/2), center_index+int(window_size/2)+1):
        d = data_set[i][0:2]
        d[0] = d[0]/1280
        d[1] = d[1]/720
        sample = sample + [d]
    return sample

In [None]:
def create_training_sample_mirrored_X(data_set, window_size, center_index):
    sample = []
    for i in range(center_index-int(window_size/2), center_index+int(window_size/2)+1):
        d = data_set[i][0:2]
        d[0] = 1-(d[0]/1280)
        d[1] = d[1]/720
        sample = sample + [d]
    return sample

In [None]:
def create_training_sample_mirrored_Y(data_set, window_size, center_index):
    sample = []
    for i in range(center_index-int(window_size/2), center_index+int(window_size/2)+1):
        d = data_set[i][0:2]
        d[0] = d[0]/1280
        d[1] = 1-(d[1]/720)
        sample = sample + [d]
    return sample

In [None]:
def create_training_sample_mirrored_XY(data_set, window_size, center_index):
    sample = []
    for i in range(center_index-int(window_size/2), center_index+int(window_size/2)+1):
        d = data_set[i][0:2]
        d[0] = 1-(d[0]/1280)
        d[1] = 1-(d[1]/720)
        sample = sample + [d]
    return sample

In [None]:
def create_training_sample_reversed(data_set, window_size, center_index):
    sample = []
    for i in range(center_index+int(window_size/2), center_index-int(window_size/2)-1, -1):
        d = data_set[i][0:2]
        d[0] = d[0]/1280
        d[1] = d[1]/720
        sample = sample + [d]
    return sample

In [None]:
def create_training_sample_reversed_mirrored_X(data_set, window_size, center_index):
    sample = []
    for i in range(center_index+int(window_size/2), center_index-int(window_size/2)-1, -1):
        d = data_set[i][0:2]
        d[0] = 1-(d[0]/1280)
        d[1] = d[1]/720
        sample = sample + [d]
    return sample

In [None]:
def create_training_sample_reversed_mirrored_Y(data_set, window_size, center_index):
    sample = []
    for i in range(center_index+int(window_size/2), center_index-int(window_size/2)-1, -1):
        d = data_set[i][0:2]
        d[0] = d[0]/1280
        d[1] = 1-(d[1]/720)
        sample = sample + [d]
    return sample

In [None]:
def rotate_around_point(x, y, radians, origin=(0, 0)):
    #Rotate a point around a given point.
    offset_x, offset_y = origin
    adjusted_x = (x - offset_x)
    adjusted_y = (y - offset_y)
    cos_rad = math.cos(radians)
    sin_rad = math.sin(radians)
    qx = offset_x + cos_rad * adjusted_x + sin_rad * adjusted_y
    qy = offset_y + -sin_rad * adjusted_x + cos_rad * adjusted_y
    return qx, qy

In [None]:
def create_training_sample_rotated(data_set, window_size, center_index, angle):
    sample = []
    for i in range(center_index-int(window_size/2), center_index+int(window_size/2)+1):
        d = data_set[i][0:2]
        
        angle = math.radians(angle)
        x, y = rotate_around_point(d[0], d[1], angle, (640,360))
        x = x/1280
        y = y/720
        if x < 0 or x > 1 or y < 0 or y > 1:
            return None, None
        sample = sample + [d]
    return sample

In [None]:
def process_data(data_set, window_size):
    samples = []
    labels = []
    
    for i in tqdm(range(int(window_size/2), len(data_set)-int(window_size/2))):
        label = data_set[i][2]
        
        sample = create_training_sample(data_set, window_size, i)
        samples.append(sample)
        labels.append([label])
        
        if label == 2 or label == 3:
            sample = create_training_sample_mirrored_X(data_set, window_size, i)
            samples.append(sample)
            labels.append([label])
            
            sample = create_training_sample_mirrored_Y(data_set, window_size, i)
            samples.append(sample)
            labels.append([label])
            
            sample = create_training_sample_mirrored_XY(data_set, window_size, i)
            samples.append(sample)
            labels.append([label])
            
            sample = create_training_sample_reversed(data_set, window_size, i)
            samples.append(sample)
            labels.append([label])
            
            sample = create_training_sample_reversed_mirrored_Y(data_set, window_size, i)
            samples.append(sample)
            labels.append([label])
            
            sample = create_training_sample_reversed_mirrored_X(data_set, window_size, i)
            samples.append(sample)
            labels.append([label])
        
    return samples, labels

In [None]:
load_data('/bigpool/strohmfn/gazecom/ground_truth/')