In [1]:
# to load the dataset from google drive
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
# unzip the gaze history files
!unzip drive/My\ Drive/PFE/Gaze_txt_files

In [0]:
# create gaze dataset
import os
from sklearn import preprocessing
import random


raw_data_path = "Gaze_txt_files"
dir_list = sorted(os.listdir(raw_data_path))
p_num = len(dir_list)
p_data_list = [{} for _ in range(p_num)]
# fill p_data
max_t = 0
for dir_name in dir_list:
  person_gaze_txt_path = os.path.join(raw_data_path, dir_name)
  pid = int(dir_name.replace('p', ''))
  for video_name in os.listdir(person_gaze_txt_path):
    with open(os.path.join(person_gaze_txt_path,video_name), 'r') as f:
      trajectory_list = f.read().split()
      gaze_video_list = []
      for i in trajectory_list:
        i = i.split(",")
        x = float(i[-2])
        y = float(i[-1])
        gaze_video_list.append((x,y))
      # normalize the values of latitude and longitude before adding them to the dictionary
      min_max_scaler = preprocessing.MinMaxScaler()
      x_scaled = min_max_scaler.fit_transform(gaze_video_list)
      p_data_list[int(pid)-1][int(video_name.split(".")[0])] = x_scaled

In [43]:
# show some message
print('viewer data list size: ', len(p_data_list))
print('number of videos for the first viewer: ', len(p_data_list[1]))
print('number of frames in the first video of first viewer (video 179): ', len(p_data_list[1][179]))
print('maximal value in video gazes : ', p_data_list[1][179].max())
print('minimal value in video gazes : ', p_data_list[1][179].min())

viewer data list size:  45
number of videos for the first viewer:  36
number of frames in the first video of first viewer(179):  1479
maximal value in video gazes :  1.0
minimal value in video gazes :  0.0


In [0]:
from keras.models import Model
from keras.layers import Input, LSTM, Dense, concatenate
from keras.applications.inception_resnet_v2 import InceptionResNetV2

"""
The trajectory encoder module
"""
# Define an input sequence of previous gazes and process it.
encoder_inputs = Input(shape=(None, 2))
# Define the first lstm layer
lstm1 = LSTM(128, return_sequences=True, return_state=True)
lstm1_outputs, state_h, state_c = lstm1(encoder_inputs)
lstm1_states = [state_h, state_c]
# Define the second lstm layer
lstm2 = LSTM(128)
# Set up the second lstm, using `lstm1_states` as initial state.
trajectory_encoder_outputs = lstm2(lstm1_outputs,
                                initial_state=lstm1_states)
"""
The Saliency encoder module
"""
# Define input for all spatial and temporal saliency maps and process it.
saliency_inputs = Input(shape=(480*8, 960, 3))
# Use  Inception-ResNet-V2 to extract saliency features for gaze prediction followed with a global pooling
saliency_encoder_outputs = InceptionResNetV2(weights='imagenet', include_top=False, pooling='avg')(saliency_inputs)
"""
Displacement Prediction Module
"""
# Concatenate s the output of saliency encoder module and trajectory encoder module
merged = concatenate([saliency_encoder_outputs, trajectory_encoder_outputs])
# use two fully connected layer to estimate the displacement between the gaze point at time t + 1 and gaze point at time t:
x = Dense(1000)(merged)
displacement_outputs = Dense(2)(x)

# Create the proposed model
model = Model([encoder_inputs, saliency_inputs], displacement_outputs)

In [11]:
# here is a summary of the global architecture
model.summary()

Model: "model_4"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_11 (InputLayer)           (None, None, 2)      0                                            
__________________________________________________________________________________________________
input_12 (InputLayer)           (None, 3840, 960, 3) 0                                            
__________________________________________________________________________________________________
lstm_1 (LSTM)                   [(None, None, 128),  67072       input_11[0][0]                   
__________________________________________________________________________________________________
inception_resnet_v2 (Model)     (None, 1536)         54336736    input_12[0][0]                   
____________________________________________________________________________________________

In [14]:
# Compile the model
from keras.optimizers import SGD
sgd = SGD(lr=0.1, decay=5e-4, momentum=0.9)
model.compile(optimizer=sgd, loss='categorical_crossentropy')



