Two types of attention scores:  
1. Experiment level attention score: normalize the attention of each QA pair of an experiment;
2. Human level attention score: average attention scores of all the participants of the specific QA pair.



In [97]:
import pandas as pd
import numpy as np
import json
import os
import pprint as pp

In [98]:
# Load files
PRE_PROCESS_DATA_FOLDER = "./data/WebQAmGaze"
FIXATION_DATA_FOLDER = os.path.join("./data/WebQAmGaze/pre_processed_data","fixation_data_per_part")
experiments_config_file = 'target_experiments_IS_EN.json'
data_list = []
with open(os.path.join(PRE_PROCESS_DATA_FOLDER, experiments_config_file)) as f:
    data_list = json.load(f)
len(data_list), data_list[0]

(770,
 {'worker_id': 'A4W9APAHFWVLO',
  'set_name': 'mturk_EN_v13',
  'trial_5_condition': 'is',
  'trial_5_name': 'a_Kenya_1',
  'question_5_name': 'q_after_a_Kenya_1_qa_0',
  'text_name': 'a_Kenya_1',
  'question_name': 'q_after_a_Kenya_1_qa_0',
  'text': "On 28 February 2008, Kibaki and Odinga signed an agreement on the formation of a coalition government in which Odinga would become Kenya's second Prime Minister. Under the deal, the president would appoint cabinet ministers from both PNU and ODM camps depending on each party's strength in Parliament. The agreement stipulated that the cabinet would include a vice-president and two deputy Prime Ministers. After debates, it was passed by Parliament, the coalition would hold until the end of the current Parliament or if either of the parties withdraws from the deal before then.",
  'question': 'When did Kibaki and Odinga sing an agreement on the formation of government?'})

In [99]:
def normalize(matrix):
    # Only this is changed to use 2-norm put 2 instead of 1
    norm = np.linalg.norm(matrix, 1)
    if norm > 0.0:
        # normalized matrix
        matrix = matrix/norm
      
    return norm, matrix

In [100]:
# Load TRT and FIxations of each sentence of each experiment

# To load the TRTs and Number of Fixations for each word given a trial one can use:
new_data_list = []
for entry in data_list:
    participant_fixation_dictionary = pd.read_csv(
        os.path.join(FIXATION_DATA_FOLDER,
            f"{entry['worker_id']}_{entry['set_name']}_fix_dict.csv")) 
    # print(participant_fixation_dictionary)
    # To see the TRTs and FixationCounts for words in the text:
    # print(entry['text_name'])
    TRTs_and_FixationCounts_sen = participant_fixation_dictionary[participant_fixation_dictionary['text_id'] == entry['text_name']]
    # Repalce NaN with zero on all columns 
    TRTs_and_FixationCounts_sen = TRTs_and_FixationCounts_sen.fillna(0)
    norm_1, matrix = normalize(TRTs_and_FixationCounts_sen['TRT'].to_numpy())
    entry['trt'] = matrix
    # print(np.sum(matrix))
    norm_2, matrix = normalize(TRTs_and_FixationCounts_sen['FixCount'].to_numpy())
    entry['fixation'] = matrix
    # print(np.sum(matrix))
    if norm_1 > 0.0 and norm_2 > 0.0:
        new_data_list.append(entry)
data_list = new_data_list    
data_list[0]


{'worker_id': 'A4W9APAHFWVLO',
 'set_name': 'mturk_EN_v13',
 'trial_5_condition': 'is',
 'trial_5_name': 'a_Kenya_1',
 'question_5_name': 'q_after_a_Kenya_1_qa_0',
 'text_name': 'a_Kenya_1',
 'question_name': 'q_after_a_Kenya_1_qa_0',
 'text': "On 28 February 2008, Kibaki and Odinga signed an agreement on the formation of a coalition government in which Odinga would become Kenya's second Prime Minister. Under the deal, the president would appoint cabinet ministers from both PNU and ODM camps depending on each party's strength in Parliament. The agreement stipulated that the cabinet would include a vice-president and two deputy Prime Ministers. After debates, it was passed by Parliament, the coalition would hold until the end of the current Parliament or if either of the parties withdraws from the deal before then.",
 'question': 'When did Kibaki and Odinga sing an agreement on the formation of government?',
 'trt': array([0.1040153 , 0.28642447, 0.17820268, 0.02676864, 0.05239006,
    

In [101]:
# Group by participants
from collections import defaultdict
data_by_participants = defaultdict(list)
for data in data_list:
    data_by_participants[data['worker_id']].append(data)
len(data_by_participants.keys()) # TODO check here. The number of workers does not match.

149

In [102]:
# Group by sentences
data_by_sentences = defaultdict(list)
for data in data_list:
    data_by_sentences[data['text_name']].append(data)
len(data_by_sentences.keys()) # TODO check here. The number of workers does not match.

71

In [103]:
# the human relative importance by sentences
importance_by_sentences = []
print_flag = True
for key in data_by_sentences:
    data = data_by_sentences[key]
    size = len(data)
    if size != 0:
        importance_vector_trt = data[0]['trt']
        importance_vector_fixation = data[0]['fixation']
        print(importance_vector_fixation[:5])
        for i in range(1, size):
            importance_vector_trt += data[i]['trt']
            importance_vector_fixation += data[i]['fixation']
            print(importance_vector_fixation[:5]) 
        importance_vector_trt /= size
        importance_vector_fixation /= size
        print(importance_vector_fixation[:5])
        entry = {'text_name': key, 
                'importance_vector_trt': importance_vector_trt, 
                'importance_vector_fixation': importance_vector_fixation}
        importance_by_sentences.append(entry)
        
        print(np.sum(importance_vector_trt))
        print(np.sum(importance_vector_fixation))
print(len(importance_by_sentences))

[0.08695652 0.17391304 0.17391304 0.08695652 0.08695652]
[0.08695652 0.17391304 0.19193106 0.09596553 0.11398355]
[0.08695652 0.17391304 0.21632131 0.10816065 0.16276404]
[0.08695652 0.17391304 0.21632131 0.10816065 0.16276404]
[0.08695652 0.17391304 0.21632131 0.10816065 0.16276404]
[0.0173913  0.03478261 0.04326426 0.02163213 0.03255281]
1.0
1.0
[0.         0.00404858 0.01619433 0.01214575 0.02024291]
[0.         0.00404858 0.01619433 0.01214575 0.02024291]
[0.         0.01720648 0.02935223 0.03846154 0.03340081]
[0.         0.02605604 0.02935223 0.0473111  0.03340081]
[0.         0.02605604 0.02935223 0.0473111  0.03340081]
[0.         0.02605604 0.02935223 0.0473111  0.03340081]
[0.         0.03363179 0.02935223 0.0473111  0.04097657]
[0.         0.03363179 0.02935223 0.0473111  0.04097657]
[0.         0.05058095 0.02935223 0.0473111  0.05792572]
[0.00529101 0.05587195 0.02935223 0.0473111  0.06850773]
[0.00529101 0.05587195 0.02935223 0.0473111  0.06850773]
[0.00864671 0.05922766 