In [1]:
import numpy as np
import pandas as pd
from math import isnan

In [2]:
data = pd.read_csv('test_data_17.tsv', sep='\t')

  interactivity=interactivity, compiler=compiler, result=result)


In [3]:
print(len(data))

57892


In [4]:
def point_inside_polygon(x,y,poly):
    """Determines if a point is inside a given polygon or not
    
        The algorithm is called "Ray Casting Method".
        
    Args:
        poly: is a list of (x,y) pairs defining the polgon 
        
    Returns:
        True or False.
    """
    n = len(poly)
    
    if n==0:
        return False
    
    inside = False

    p1x,p1y = poly[0]
    for i in range(n+1):
        p2x,p2y = poly[i % n]
        if y > min(p1y,p2y):
            if y <= max(p1y,p2y):
                if x <= max(p1x,p2x):
                    if p1y != p2y:
                        xinters = (y-p1y)*(p2x-p1x)/(p2y-p1y)+p1x
                    if p1x == p2x or x <= xinters:
                        inside = not inside
        p1x,p1y = p2x,p2y

    return inside   

## AOI boundaries
graph_polyin = [(276,228),(1012,228),(1012,852),(276,852)]
top_polyin = [(469,112), (811,112), (811,211), (469,211)]

def _datapoint_inside_aoi(x, y, polyin):
    """Helper function that checks if a datapoint object is inside the AOI described by extrernal polygon polyin and the internal polygon polyout.

    Datapoint object is inside AOI if it is inside polyin but outside polyout

    Args:
        datapoint: A Datapoint object
        polyin: the external polygon in form of a list of (x,y) tuples
        polyout: the internal polygon in form of a list of (x,y) tuples

    Returns:
        A boolean for whether the Datapoint is inside the AOI or not
    """
    inside = False
    if point_inside_polygon(x,y, polyin):
            inside = True
    return inside

def get_chunk(data, ind, start, end):
    """Returns index of first and last records in data that fall within a time interval (start-end) 
    Args:
        data: a list of subsequent Fixations or Datapoints
        ind: an integer indicating the starting index in data for search, if not known 
            should be set to zero.
        start: an integer indicating the start of interval in milliseconds
        end: an integer indicating the end of interval in milliseconds
        
    Returns:
        curr_ind: an integer indicating the index of the next record for search. 
            This is useful if you are performing a series searches in a sequential manner.
            The method can start the next search from this index instead of beginning of the list.               
        start_ind: an integer indicating the index of first record in the list that falls within
            the given time interval
        end_ind: an integer indicating the index of last record in the list that falls within
            the given time interval
    """
    datalen = len(data)
    curr_ind = ind
            
    while curr_ind < datalen and data[curr_ind].timestamp < start:
        curr_ind += 1
        
        start_ind = curr_ind
        while curr_ind < datalen and data[curr_ind].timestamp <= end:
            curr_ind += 1
        
        end_ind = curr_ind -1
        
        end_ind +=1 #because the last index is not inclusive in Python!
    return curr_ind, start_ind, end_ind

In [5]:
print("Filtering non-ScreenRec datapoints")
filtered_data = data[data["MediaName"] == 'ScreenRec']
print(len(filtered_data))
print()

Filtering non-ScreenRec datapoints
57505
()


In [6]:
print("Removing points without validity info")
filtered_data = filtered_data.dropna(subset=['ValidityLeft', 'ValidityRight'])
print(len(filtered_data))

Removing points without validity info
57337

So far we are consistent with EMDAT output


## Mean pupil size for the scene

In [10]:
pupil_sizes = pd.Series(np.zeros(len(filtered_data)))
distance_dps = pd.Series(np.zeros(len(filtered_data)))

In [11]:
def get_pupil_size(pupilleft, pupilright):
    if isnan(pupilleft) and isnan(pupilright):
        return -1
    if isnan(pupilleft):
        return pupilright
    if isnan(pupilright):
        return pupilleft
    return (pupilleft + pupilright) / 2.0

def get_distance(distanceleft, distanceright):
    if isnan(distanceleft) and isnan(distanceright):
        return -1
    if isnan(distanceleft):
        return distanceright
    if isnan(distanceright):
        return distanceleft
    return (distanceleft + distanceright) / 2.0

In [12]:
i = 0
for index, row in filtered_data.iterrows():
    pupil = get_pupil_size(row["PupilLeft"], row["PupilRight"])
    distance = get_distance(row["DistanceLeft"], row["DistanceRight"])
    pupil_sizes[i] = pupil
    distance_dps[i] = distance
    i += 1


In [13]:
valid_pupil_data = pupil_sizes[pupil_sizes != -1]
valid_distance_data = distance_dps[distance_dps != -1]
valid_distance_data = valid_distance_data[valid_distance_data != 0]
print('number of all valid pupil size samples overall: %d' % len(valid_pupil_data))
print('number of all valid distance samples overall: %d' % len(valid_distance_data))

print("Pupil size mean so far on the whole dataset: %f" % valid_pupil_data.mean())
print("Distance mean so far on the whole dataset: %f" % valid_distance_data.mean())

number of all valid pupil size samples overall: 50777
number of all valid distance samples overall: 47115
Pupil size mean so far on the whole dataset: 2.264461
Distance mean so far on the whole dataset: 614.516281
Again, so far we have consistency with the original dataset


## Aggregated features for a scene


In [14]:
## File 16
#st_seg_1 = 59722
#ed_seg_1 = 113120

#st_seg_2 = 113120 
#ed_seg_2 = 230202

st_seg_1 = 72727
ed_seg_1 = 158389
st_seg_2 = 158389
ed_seg_2 = 367172

filtered_data_seg1 = filtered_data[filtered_data['RecordingTimestamp'].between(st_seg_1, ed_seg_1, inclusive=True)]
filtered_data_seg2 = filtered_data[filtered_data['RecordingTimestamp'].between(st_seg_2, ed_seg_2, inclusive=True)]
print(len(filtered_data_seg1))
print(len(filtered_data_seg2))

10286
25071


In [15]:
print("Removing points with invalid gaze values for segment 1")
filtered_data_seg1 = filtered_data_seg1.dropna(subset=['GazePointY (MCSpx)', 'GazePointX (MCSpx)'])
print(len(filtered_data_seg1))

print("Removing points with invalid gaze values for segment 2")
filtered_data_seg2 = filtered_data_seg2.dropna(subset=['GazePointY (MCSpx)', 'GazePointX (MCSpx)'])
print(len(filtered_data_seg2))

Removing points with invalid gaze values for segment 1
9910
Removing points with invalid gaze values for segment 2
21477


In [16]:
pupil_sizes_seg1 = pd.Series(np.zeros(len(filtered_data_seg1)))
distance_dps_seg1 = pd.Series(np.zeros(len(filtered_data_seg1)))

pupil_sizes_seg2 = pd.Series(np.zeros(len(filtered_data_seg2)))
distance_dps_seg2 = pd.Series(np.zeros(len(filtered_data_seg2)))

In [17]:
i = 0
for index, row in filtered_data_seg1.iterrows():
    pupil = get_pupil_size(row["PupilLeft"], row["PupilRight"])
    distance = get_distance(row["DistanceLeft"], row["DistanceRight"])
    pupil_sizes_seg1[i] = pupil
    distance_dps_seg1[i] = distance
    i += 1

In [18]:
i = 0
for index, row in filtered_data_seg2.iterrows():
    pupil = get_pupil_size(row["PupilLeft"], row["PupilRight"])
    distance = get_distance(row["DistanceLeft"], row["DistanceRight"])
    pupil_sizes_seg2[i] = pupil
    distance_dps_seg2[i] = distance
    i += 1

In [19]:
valid_pupil_data_seg1 = pupil_sizes_seg1[pupil_sizes_seg1 > 0]
valid_distance_data_seg1 = distance_dps_seg1[distance_dps_seg1 > 0]
print('number of all valid pupil size samples for segment 1: %d' % len(valid_pupil_data_seg1))
print('number of all valid distance samples for segment 1: %d' % len(valid_distance_data_seg1))

print("Distance mean so far for the segment1: %f" % valid_distance_data_seg1.mean())

print("Pupil mean so far for the segment: %f" % valid_pupil_data_seg1.mean())


valid_pupil_data_seg2 = pupil_sizes_seg2[pupil_sizes_seg2 > 0]
valid_distance_data_seg2 = distance_dps_seg2[distance_dps_seg2 > 0]
print('number of all valid pupil size samples for segment 2: %d' % len(valid_pupil_data_seg2))
print('number of all valid distance samples for segment 2: %d' % len(valid_distance_data_seg2))

print("Distance mean so far for the segment2: %f" % valid_distance_data_seg2.mean())

print("Pupil mean so far for the segment2: %f" % valid_pupil_data_seg2.mean())


number of all valid pupil size samples for segment 1: 9910
number of all valid distance samples for segment 1: 9629
Distance mean so far for the segment1: 631.939086
Pupil mean so far for the segment: 2.354728
number of all valid pupil size samples for segment 2: 21477
number of all valid distance samples for segment 2: 19798
Distance mean so far for the segment2: 602.812719
Pupil mean so far for the segment2: 2.247576


## AOI Features:

In [37]:
filtered_data_seg1_graph = filtered_data_seg1.copy()
filtered_data_seg2_graph = filtered_data_seg2.copy()
filtered_data_seg1_top = filtered_data_seg1.copy()
filtered_data_seg2_top = filtered_data_seg2.copy()

print("removing points outside Graph AOI")
deleted_count = 0
for index, row in filtered_data_seg1_graph.iterrows():
    if not (_datapoint_inside_aoi(row['GazePointX (MCSpx)'], row['GazePointY (MCSpx)'], graph_polyin)):
        deleted_count += 1
        filtered_data_seg1_graph.drop(index, inplace=True)

print(deleted_count)
print(len(filtered_data_seg1_graph))

print("removing points outside Graph AOI")
deleted_count = 0
for index, row in filtered_data_seg2_graph.iterrows():
    if not (_datapoint_inside_aoi(row['GazePointX (MCSpx)'], row['GazePointY (MCSpx)'], graph_polyin)):
        deleted_count += 1
        filtered_data_seg2_graph.drop(index, inplace=True)

print(deleted_count)
print(len(filtered_data_seg2_graph))

print("removing points outside Top AOI")
deleted_count = 0
for index, row in filtered_data_seg1_top.iterrows():
    if not (_datapoint_inside_aoi(row['GazePointX (MCSpx)'], row['GazePointY (MCSpx)'], top_polyin)):
        deleted_count += 1
        filtered_data_seg1_top.drop(index, inplace=True)        
        
print(deleted_count)
print(len(filtered_data_seg1_top))


print("removing points outside Top AOI")
deleted_count = 0
for index, row in filtered_data_seg2_top.iterrows():
    if not (_datapoint_inside_aoi(row['GazePointX (MCSpx)'], row['GazePointY (MCSpx)'], top_polyin)):
        deleted_count += 1
        filtered_data_seg2_top.drop(index, inplace=True)        
        
print(deleted_count)
print(len(filtered_data_seg2_top))


removing points outside Graph AOI
2408
7502
removing points outside Graph AOI
4277
17200
removing points outside Top AOI
9225
685
removing points outside Top AOI
20076
1401


In [40]:
pupil_sizes_seg1_graph = pd.Series(np.zeros(len(filtered_data_seg1_graph)))
distances_seg1_graph = pd.Series(np.zeros(len(filtered_data_seg1_graph)))

pupil_sizes_seg1_top = pd.Series(np.zeros(len(filtered_data_seg1_top)))
distances_seg1_top = pd.Series(np.zeros(len(filtered_data_seg1_top)))

pupil_sizes_seg2_graph = pd.Series(np.zeros(len(filtered_data_seg2_graph)))
distances_seg2_graph = pd.Series(np.zeros(len(filtered_data_seg2_graph)))

pupil_sizes_seg2_top = pd.Series(np.zeros(len(filtered_data_seg2_top)))
distances_seg2_top = pd.Series(np.zeros(len(filtered_data_seg2_top)))

In [41]:
def get_pupils(acc, filt_data):
    i = 0
    for index, row in filt_data.iterrows():
        pupil = get_pupil_size(row["PupilLeft"], row["PupilRight"])
        acc[i] = pupil
        i += 1
    return acc

def get_distances(acc, filt_data):
    i = 0
    for index, row in filt_data.iterrows():
        pupil = get_pupil_size(row["DistanceLeft"], row["DistanceRight"])
        acc[i] = pupil
        i += 1
    return acc
    

In [42]:
pipil_sizes_seg1_graph = get_pupils(pupil_sizes_seg1_graph, filtered_data_seg1_graph)

pipil_sizes_seg1_top = get_pupils(pupil_sizes_seg1_top, filtered_data_seg1_top)

pipil_sizes_seg2_graph = get_pupils(pupil_sizes_seg2_graph, filtered_data_seg2_graph)

pipil_sizes_seg2_top = get_pupils(pupil_sizes_seg2_top, filtered_data_seg2_top)


distances_seg1_graph = get_distances(distances_seg1_graph, filtered_data_seg1_graph)

distances_seg1_top = get_distances(distances_seg1_top, filtered_data_seg1_top)

distances_seg2_graph = get_distances(distances_seg2_graph, filtered_data_seg2_graph)

distances_seg2_top = get_distances(distances_seg2_top, filtered_data_seg2_top)

In [43]:
print(len(pipil_sizes_seg1_graph))
print(len(pipil_sizes_seg1_top))
print(len(pipil_sizes_seg2_graph))
print(len(pipil_sizes_seg2_top))
print(len(distances_seg1_graph))
print(len(distances_seg1_top))
print(len(distances_seg2_graph))
print(len(distances_seg2_top))

pipil_sizes_seg1_graph = pipil_sizes_seg1_graph[pipil_sizes_seg1_graph > 0]
pipil_sizes_seg1_top = pipil_sizes_seg1_top[pipil_sizes_seg1_top > 0]
pipil_sizes_seg2_graph = pipil_sizes_seg2_graph[pipil_sizes_seg2_graph > 0]
pipil_sizes_seg2_top = pipil_sizes_seg2_top[pipil_sizes_seg2_top > 0]


distances_seg1_graph = distances_seg1_graph[distances_seg1_graph > 0]
distances_seg1_top = distances_seg1_top[distances_seg1_top > 0]
distances_seg2_graph = distances_seg2_graph[distances_seg2_graph > 0]
distances_seg2_top = distances_seg2_top[distances_seg2_top > 0]


print
print('Sizes after dropping')
print(len(pipil_sizes_seg1_graph))
print(len(pipil_sizes_seg1_top))
print(len(pipil_sizes_seg2_graph))
print(len(pipil_sizes_seg2_top))
print(len(distances_seg1_graph))
print(len(distances_seg1_top))
print(len(distances_seg2_graph))
print(len(distances_seg2_top))

7502
685
17200
1401
7502
685
17200
1401

Sizes after dropping
7502
685
17200
1401
7352
628
15854
1314


In [44]:
print("Means of pupil sizes")

print(pipil_sizes_seg1_graph.mean())
print(pipil_sizes_seg1_top.mean())
print(pipil_sizes_seg2_graph.mean())
print(pipil_sizes_seg2_top.mean())

print("Means of distances")

print(distances_seg1_graph.mean())
print(distances_seg1_top.mean())
print(distances_seg2_graph.mean())
print(distances_seg2_top.mean())

Means of pupil sizes
2.36399160224
2.24508029197
2.25951773256
2.17697359029
Means of distances
631.453409956
633.226417197
602.092546991
609.396659056


In [50]:
print("Maximums of pupil sizes")

print(pipil_sizes_seg1_graph.max())
print(pipil_sizes_seg2_graph.max())
print(pipil_sizes_seg1_top.max())

print(pipil_sizes_seg2_top.max())

print("Maximums of distances")

print(distances_seg1_graph.max())
print(distances_seg2_graph.max())
print(distances_seg1_top.max())
print(distances_seg2_top.max())

Maximums of pupil sizes
2.85
3.14
2.5999999999999996
2.415
Maximums of distances
650.27
653.11
654.61
647.98


In [59]:
print("stds of pupil sizes")

print(pipil_sizes_seg1_graph.std())
print(pipil_sizes_seg2_graph.std())

print(pipil_sizes_seg1_top.std())

print(pipil_sizes_seg2_top.std())

print("stds of distances")
print(distances_seg1_graph.std())
print(distances_seg2_graph.std())
print(distances_seg1_top.std())

print(distances_seg2_top.std())

stds of pupil sizes
0.114865342858
0.148383526922
0.124349597873
0.0626977900205
stds of distances
2.2638213712
14.3479850018
4.50552863263
10.965956009


In [51]:
pupil_sizes_graph_merged = pd.concat([pipil_sizes_seg1_graph, pipil_sizes_seg2_graph])
distances_graph_merged = pd.concat([distances_seg1_graph, distances_seg2_graph])

pupil_sizes_top_merged = pd.concat([pipil_sizes_seg1_top, pipil_sizes_seg2_top])
distances_top_merged = pd.concat([distances_seg1_top, distances_seg2_top])

In [54]:
print("maximum for merged features for graph:")
print("max pupil size for graph %f" % pupil_sizes_graph_merged.max())
print("max distance for graph %f" % distances_graph_merged.max())

print("maximum for merged features for top:")
print("max pupil size for top %f" % pupil_sizes_top_merged.max())
print("max distance for top %f" % distances_top_merged.max())

maximum for merged features for graph:
max pupil size for graph 3.140000
max distance for graph 653.110000
maximum for merged features for top:
max pupil size for top 2.600000
max distance for top 654.610000


In [57]:
print("means for merged features for graph:")
print("mean pupil size for graph %f" % pupil_sizes_graph_merged.mean())
print("mean distance for graph %f" % distances_graph_merged.mean())

print("mean for merged features for top:")
print("mean pupil size for top %f" % pupil_sizes_top_merged.mean())
print("mean distance for top %f" % distances_top_merged.mean())

means for merged features for graph:
mean pupil size for graph 2.291246
mean distance for graph 611.394498
mean for merged features for top:
mean pupil size for top 2.199338
mean distance for top 617.102678


In [58]:
print("stds for merged features for graph:")
print("std pupil size for graph %f" % pupil_sizes_graph_merged.std())
print("std distance for graph %f" % distances_graph_merged.std())

print("std for merged features for top:")
print("std pupil size for top %f" % pupil_sizes_top_merged.std())
print("std distance for top %f" % distances_top_merged.std())

stds for merged features for graph:
std pupil size for graph 0.147124
std distance for graph 18.134483
std for merged features for top:
std pupil size for top 0.093465
std distance for top 14.567653
