In [None]:
import os
from dataclasses import dataclass, asdict
import pandas as pd
import json
from collections import Counter
from data_loading.data_loader import SciBot_DataLoader  # Provided From https://github.com/DFKI-Interactive-Machine-Learning/gazeRE-dataset 
from gaze_event_detection.idt import fixation_detection
from gaze_event_detection.saccade_detection import compute_saccade_aoi_features
from gaze_event_detection.convex_hull_area_features import compute_area_aoi_features

from statistics import mean, stdev
os.environ["OMP_NUM_THREADS"] = '1'
from sklearn.cluster import AffinityPropagation

In [None]:
@dataclass
class GraphDataClass:
    __slots__ = ["user_id", "stimulus", "label", "edge_index", "node_features", "system_label", "gREL_label"]
    user_id: str
    stimulus: str
    edge_index: list
    node_features: list
    label: bool
    system_label: bool
    gREL_label: str

In [None]:
@dataclass
class AOIDataClass:
    __slots__ = ["user_id", "stimulus", "aoi_scanpath", "node_features", "label", "system_label", "gREL_label"]
    user_id: str
    stimulus: str
    label: bool
    aoi_scanpath: list
    node_features: dict
    system_label: bool
    gREL_label: str

In [None]:
def assign_aoi(scanpath_graph_representation_type: str, current_stimulus_id: str, x_point, y_point) -> int:
    """
    For each fixation we assign which AOI it falls in.
        The title takes number 0.
        The paragraphs take numbers from 1 until #Paragraphs.
        If the fixation is on an empty space or on the rating button it takes number 100.
    We discard the rating button since the original publication didn't take it into perspective.
    Based on the stimuli, we check that the fixation is with 700 and 1800 to make sure it fell on text and not on empty space. 
    This function does not deal with Cluster-based scanpath graph representations.
    """
    aoi_id = 100
    directory_path = f"../Data/Stimuli_Coordinates_Data/gREL_{scanpath_graph_representation_type}_AOI.json"
    with open(directory_path, 'r') as fp:
        dict_coordinates = json.load(fp)
    if scanpath_graph_representation_type != "Quartile":
        if dict_coordinates is not None:
            if 700 <= x_point <= 1800:
                for AOI in dict_coordinates[current_stimulus_id]:
                    if dict_coordinates[current_stimulus_id][AOI]["y_bottom"] <= y_point <= dict_coordinates[current_stimulus_id][AOI]["y_top"]:
                        aoi_id = AOI
        else:
            print("Wrong Coordinates")
    else:
        """
        The Quartile-based scanpath graph representations were split based on x coordinates as well. 
        """
        if 700 <= x_point < 1250: 
            if dict_coordinates[current_stimulus_id]["0"]["y_bottom"] <= y_point < dict_coordinates[current_stimulus_id]["0"]["y_top"]:
                aoi_id = 0
            elif dict_coordinates[current_stimulus_id]["1"]["y_bottom"] <= y_point < dict_coordinates[current_stimulus_id]["1"]["y_top"]:
                aoi_id = 2
        elif 1250 <= x_point < 1800:
            if dict_coordinates[current_stimulus_id]["0"]["y_bottom"] <= y_point < dict_coordinates[current_stimulus_id]["0"]["y_top"]:
                aoi_id = 1
            elif dict_coordinates[current_stimulus_id]["1"]["y_bottom"] <= y_point < dict_coordinates[current_stimulus_id]["1"]["y_top"]:
                aoi_id = 3
    return int(aoi_id)

In [None]:
def get_stimulus_node_size(scanpath_graph_representation_type: str, current_stimulus_id: str) -> int:
    """
    :param scanpath_graph_representation_type: Scanpath graph representation whether Paragraph, Line, or Quartile.
    :param current_stimulus_id: Get the current stimulus ID to check the number of AOIs.
    :return: Number of AOIs in the stimulus CSV + 1 because of the current setup where the image coordinates are not in the CSV file
    """
    if scanpath_graph_representation_type != "Quartile":
        directory_path = f"../Data/Stimuli_Coordinates_Data/gREL_{scanpath_graph_representation_type}_AOI.json"
        with open(directory_path, 'r') as fp:
            dict_coordinates = json.load(fp)
        aoi_length = len(dict_coordinates[current_stimulus_id])
    else:
        aoi_length = 4
    return aoi_length

In [None]:
def get_maximum_node_size(scanpath_graph_representation_type: str):
    maximum_aoi_length = 0
    directory_path = f"../Data/Stimuli_Coordinates_Data/gREL_{scanpath_graph_representation_type}_AOI.json"
    with open(directory_path, 'r') as fp:
        dict_coordinates = json.load(fp)
    for stimuli_id in dict_coordinates:
        stimuli_length = len(dict_coordinates[stimuli_id])
        if stimuli_length > maximum_aoi_length:
            maximum_aoi_length = stimuli_length
    return maximum_aoi_length

In [None]:
def compute_path(scanpath_graph_representation_type: str, aoi_list: list) -> list:
    """
    Here we extract the transitions between paragraphs.
    """
    aoi_path = []
    for i in range(len(aoi_list)):
            if i == len(aoi_list) - 1:
                aoi_path.append(aoi_list[i])
            elif aoi_list[i] != aoi_list[i + 1]:  # If two consecutive fixations have different AOIs then it's a transition
                aoi_path.append(aoi_list[i])
    if scanpath_graph_representation_type != "Cluster":
        aoi_path = [i for i in aoi_path if i != 100]  # We don't take the empty space transitions into account
    return aoi_path

In [None]:
def extract_graph_data(dataclass_aoi_data: AOIDataClass) -> GraphDataClass:
    """
    The GNN expects (maximum node ID = number of nodes-1) to be the maximum number that appears in the aoi_pairs.
        We pre-add zeros in the aoi_features equal to the maximum number of AOIs we have in the stimuli.
        Some AOI IDs have 0 fixations, which is why we pre-initiate the nodes with Zeros to make sure that all nodes, even empty ones, are accounted for. 
    node_size:param can either be the stimulus size using get_stimulus_node_size(), or the maximum node size for zero padding using get_maximum_node_size()
    """
    scanpath = dataclass_aoi_data.aoi_scanpath
    aoi_pairs = []
    for i in range(0, len(scanpath)):
            if 0 <= i < len(scanpath) - 1:
                aoi_pairs.append([scanpath[i], scanpath[i + 1]])
    graph_features = [value for key, value in dataclass_aoi_data.node_features.items()]
    graph_data = GraphDataClass(user_id=dataclass_aoi_data.user_id, stimulus=dataclass_aoi_data.stimulus, label=dataclass_aoi_data.label, 
                                edge_index=aoi_pairs, node_features=graph_features, system_label=dataclass_aoi_data.system_label, gREL_label=dataclass_aoi_data.gREL_label)
    return graph_data

In [None]:
"""
The eye tracking data and the data loader are both from https://github.com/DFKI-Interactive-Machine-Learning/gazeRE-dataset 
The data folder was renamed Eye_Tracking_Data to avoid confusion since we have other data as well. 
"""
# load data
users_ids = ["A01", "A03", "A04", "A06", "A07", "A08", "A09", "A10", "A11", "A12", "A13",
             "B01", "B02", "B03", "B04", "B05", "B06", "B07", "B08", "B09", "B10", "B11", "B12", "B13"]
dataloader = SciBot_DataLoader(data_dir="../Data/Eye_Tracking_Data", include_users=users_ids,
                               gaze_data=True, reading_task=True, rating_task=False, training_data=False, gREL=True)
items = dataloader.grel_reading.items()

In [None]:
"""
Define graph data parameters
    scanpath_graph_representation: "Paragraph", "Line", or "Quartile"
    with_zero_padding: True, False
    only_two_node_features: True, False
"""
for scanpath_graph_representation in ["Paragraph", "Line", "Quartile"]:
    for with_zero_padding in [True, False]:
        for only_two_node_features in [True, False]:
            output_df = pd.DataFrame(columns=["user_id", "stimulus", "label", "edge_index", "node_features", "system_label", "gREL_label"])
            num_node_features = 0
            for user, user_data in items:
                for filename in os.listdir(f"../Data/Eye_Tracking_Data/stimuli/g-REL/"):
                    document_id = filename[0:-6]
                    stimulus = filename[0:-4]
                    stimulus_number_of_aoi = get_stimulus_node_size(scanpath_graph_representation_type=scanpath_graph_representation, current_stimulus_id=stimulus)
                    dict_aoi_fixation_durations = {100: []} 
                    dict_features_per_aoi = {100: {"fixation_counts": 0, "fixation_duration_sum": 0, "fixation_duration_mean": 0, "fixation_duration_std": 0, "scan_distance_h": 0, "scan_distance_v": 0, "scan_distance_euclid": 0, "scan_hv_ratio": 0, "avg_sacc_length": 0, "scan_speed_h": 0, "scan_speed_v": 0, "scan_speed": 0}} 
                    dict_fixation_features_per_aoi = {100: {"fixation_duration_sum": 0, "fixation_duration_mean": 0, "fixation_duration_std": 0}}  # 100 is the AOI ID for empty spaces, which will be removed
                    list_stimulus_aois = [i for i in range(stimulus_number_of_aoi)]
                    list_stimulus_aois.append(100)
                    for stimulus_aoi in range(stimulus_number_of_aoi):
                        dict_aoi_fixation_durations[stimulus_aoi] = []
                        dict_fixation_features_per_aoi[stimulus_aoi] = {"fixation_duration_sum": 0, "fixation_duration_mean": 0, "fixation_duration_std": 0}
                        dict_features_per_aoi[stimulus_aoi] = {"fixation_counts": 0, "fixation_duration_sum": 0, "fixation_duration_mean": 0, "fixation_duration_std": 0, "scan_distance_h": 0, "scan_distance_v": 0, "scan_distance_euclid": 0, "scan_hv_ratio": 0, "avg_sacc_length": 0, "scan_speed_h": 0, "scan_speed_v": 0, "scan_speed": 0}
                    df_gaze_data = user_data[document_id]["dataframe"]
                    relevance_label = user_data[document_id]["perceived_relevance"][0]
                    system_relevance = user_data[document_id]["system_relevance"][0]
                    gREL_relevance = user_data[document_id]["g-rel_relevance"][0]
                    t = df_gaze_data["timestamp"].values
                    x = df_gaze_data["gaze_x"].values
                    y = df_gaze_data["gaze_y"].values
                    y_abs = df_gaze_data["gaze_y_abs"].values
                    events = fixation_detection(t=t, x=x, y=y)
                    list_fixations = []
                    list_aoi_points = []
                    for (start, end) in events:
                        x_position = mean(x[start:end])
                        y_position = mean(y_abs[start:end])
                        fixation_duration = t[end] - t[start]
                        aoi = assign_aoi(scanpath_graph_representation_type=scanpath_graph_representation, current_stimulus_id=stimulus, x_point=x_position, y_point=y_position)
                        list_aoi_points.append(aoi)
                        list_fixations.append({"start_time": t[start], "end_time": t[end], "gaze_x": x_position,
                                          "gaze_y": y_position, "duration": fixation_duration, "aoi": aoi})
                        dict_aoi_fixation_durations[aoi].append(fixation_duration)  
                    aoi_scanpath = compute_path(scanpath_graph_representation_type=scanpath_graph_representation, aoi_list=list_aoi_points)
                    # Compute Fixation Features
                    dict_fixation_counts_per_aoi = dict(Counter(list_aoi_points))  # Count the number of fixations in each AOI
                    for stimulus_aoi in list_stimulus_aois:
                        if stimulus_aoi not in dict_fixation_counts_per_aoi:
                            dict_fixation_counts_per_aoi[stimulus_aoi] = 0
                        if dict_aoi_fixation_durations[stimulus_aoi]:
                            dict_fixation_features_per_aoi[stimulus_aoi]["fixation_duration_sum"] = sum(dict_aoi_fixation_durations[stimulus_aoi])
                            dict_fixation_features_per_aoi[stimulus_aoi]["fixation_duration_mean"] = mean(dict_aoi_fixation_durations[stimulus_aoi])
                            if len(dict_aoi_fixation_durations[stimulus_aoi]) != 1:
                                dict_fixation_features_per_aoi[stimulus_aoi]["fixation_duration_std"] = stdev(dict_aoi_fixation_durations[stimulus_aoi])
                            else:
                                dict_fixation_features_per_aoi[stimulus_aoi]["fixation_duration_std"] = 0
                        else:
                            dict_fixation_features_per_aoi[stimulus_aoi]["fixation_duration_sum"] = 0
                            dict_fixation_features_per_aoi[stimulus_aoi]["fixation_duration_mean"] = 0
                            dict_fixation_features_per_aoi[stimulus_aoi]["fixation_duration_std"] = 0
                    # Compute Saccade Features
                    dict_saccade_features_per_aoi = compute_saccade_aoi_features(list_fixations=list_fixations, list_stimulus_aois=list_stimulus_aois)
                    # Combine Features
                    for stimulus_aoi in list_stimulus_aois:
                        dict_features_per_aoi[stimulus_aoi]["fixation_counts"] = dict_fixation_counts_per_aoi[stimulus_aoi]
                        dict_features_per_aoi[stimulus_aoi]["fixation_duration_sum"] = dict_fixation_features_per_aoi[stimulus_aoi]["fixation_duration_sum"]
                        dict_features_per_aoi[stimulus_aoi]["fixation_duration_mean"] = dict_fixation_features_per_aoi[stimulus_aoi]["fixation_duration_mean"]
                        dict_features_per_aoi[stimulus_aoi]["fixation_duration_std"] = dict_fixation_features_per_aoi[stimulus_aoi]["fixation_duration_std"]
                        dict_features_per_aoi[stimulus_aoi]["scan_distance_h"] = dict_saccade_features_per_aoi[stimulus_aoi]["scan_distance_h"]
                        dict_features_per_aoi[stimulus_aoi]["scan_distance_v"] = dict_saccade_features_per_aoi[stimulus_aoi]["scan_distance_v"]
                        dict_features_per_aoi[stimulus_aoi]["scan_distance_euclid"] = dict_saccade_features_per_aoi[stimulus_aoi]["scan_distance_euclid"]
                        dict_features_per_aoi[stimulus_aoi]["scan_hv_ratio"] = dict_saccade_features_per_aoi[stimulus_aoi]["scan_hv_ratio"]
                        dict_features_per_aoi[stimulus_aoi]["avg_sacc_length"] = dict_saccade_features_per_aoi[stimulus_aoi]["avg_sacc_length"]
                        dict_features_per_aoi[stimulus_aoi]["scan_speed_h"] = dict_saccade_features_per_aoi[stimulus_aoi]["scan_speed_h"]
                        dict_features_per_aoi[stimulus_aoi]["scan_speed_v"] = dict_saccade_features_per_aoi[stimulus_aoi]["scan_speed_v"]
                        dict_features_per_aoi[stimulus_aoi]["scan_speed"] = dict_saccade_features_per_aoi[stimulus_aoi]["scan_speed"]   
                    dict_features_per_aoi.pop(100, None)  # Delete empty spaces features
                    
                    # Compute Area Features
                    dict_area_features_per_aoi = None
                    if scanpath_graph_representation != "Line":
                        dict_area_features_per_aoi = compute_area_aoi_features(list_fixations, list_stimulus_aois, dict_features_per_aoi, stimulus_area=1)
                    
                    # Create Node Features List
                    dict_node_features = {}
                    if 100 in list_stimulus_aois:
                        list_stimulus_aois.remove(100)
                    if only_two_node_features:
                        for stimulus_aoi in list_stimulus_aois:
                            dict_node_features[stimulus_aoi] = [dict_features_per_aoi[stimulus_aoi]["fixation_counts"], dict_features_per_aoi[stimulus_aoi]["fixation_duration_sum"]]
                        num_node_features = 2
                    else:
                        if scanpath_graph_representation != "Line":
                            for stimulus_aoi in list_stimulus_aois:
                                dict_node_features[stimulus_aoi] = [dict_features_per_aoi[stimulus_aoi]["fixation_counts"], dict_features_per_aoi[stimulus_aoi]["fixation_duration_sum"], dict_features_per_aoi[stimulus_aoi]["fixation_duration_mean"], dict_features_per_aoi[stimulus_aoi]["fixation_duration_std"], dict_features_per_aoi[stimulus_aoi]["scan_distance_h"], dict_features_per_aoi[stimulus_aoi]["scan_distance_v"], dict_features_per_aoi[stimulus_aoi]["scan_distance_euclid"], dict_features_per_aoi[stimulus_aoi]["scan_hv_ratio"], dict_features_per_aoi[stimulus_aoi]["avg_sacc_length"], dict_features_per_aoi[stimulus_aoi]["scan_speed_h"], dict_features_per_aoi[stimulus_aoi]["scan_speed_v"], dict_features_per_aoi[stimulus_aoi]["scan_speed"], dict_area_features_per_aoi[stimulus_aoi]["box_area"], dict_area_features_per_aoi[stimulus_aoi]["box_area_per_time"], dict_area_features_per_aoi[stimulus_aoi]["fixns_per_box_area"], dict_area_features_per_aoi[stimulus_aoi]["hull_area_per_time"], dict_area_features_per_aoi[stimulus_aoi]["fixns_per_hull_area"]]
                        else:
                            for stimulus_aoi in list_stimulus_aois:
                                dict_node_features[stimulus_aoi] = [dict_features_per_aoi[stimulus_aoi]["fixation_counts"], dict_features_per_aoi[stimulus_aoi]["fixation_duration_sum"], dict_features_per_aoi[stimulus_aoi]["fixation_duration_mean"], dict_features_per_aoi[stimulus_aoi]["fixation_duration_std"], dict_features_per_aoi[stimulus_aoi]["scan_distance_h"], dict_features_per_aoi[stimulus_aoi]["scan_distance_v"], dict_features_per_aoi[stimulus_aoi]["scan_distance_euclid"], dict_features_per_aoi[stimulus_aoi]["scan_hv_ratio"], dict_features_per_aoi[stimulus_aoi]["avg_sacc_length"], dict_features_per_aoi[stimulus_aoi]["scan_speed_h"], dict_features_per_aoi[stimulus_aoi]["scan_speed_v"], dict_features_per_aoi[stimulus_aoi]["scan_speed"]]
                        num_node_features = 17
                    scanpath_aoi_dataclass = AOIDataClass(stimulus=document_id, aoi_scanpath=aoi_scanpath, user_id=user,
                                                          node_features=dict_node_features,
                                                          label=relevance_label,
                                                          system_label=system_relevance,
                                                          gREL_label=gREL_relevance)
                    user_graph_data = asdict(extract_graph_data(dataclass_aoi_data=scanpath_aoi_dataclass))
                    output_df = pd.concat([output_df, pd.DataFrame([user_graph_data])], ignore_index=True)
            output_df.to_csv(f"../Data/Graph_Data/gREL_{scanpath_graph_representation}_{num_node_features}_Features.csv", index=False)

In [None]:
"""
Define graph data parameters
    scanpath_graph_representation: "Cluster"
    only_two_node_features: True, False
"""
scanpath_graph_representation = "Cluster"

In [None]:
for only_two_node_features in [True, False]:
    output_df = pd.DataFrame(columns=["user_id", "stimulus", "label", "edge_index", "node_features", "system_label", "gREL_label"])
    num_node_features = 0
    for user, user_data in items:
        for filename in os.listdir(f"../Data/Eye_Tracking_Data/stimuli/g-REL/"):
            document_id = filename[0:-6]
            stimulus = filename[0:-4]
            df_gaze_data = user_data[document_id]["dataframe"]
            relevance_label = user_data[document_id]["perceived_relevance"][0]
            system_relevance = user_data[document_id]["system_relevance"][0]
            gREL_relevance = user_data[document_id]["g-rel_relevance"][0]
            t = df_gaze_data["timestamp"].values
            x = df_gaze_data["gaze_x"].values
            y = df_gaze_data["gaze_y"].values
            y_abs = df_gaze_data["gaze_y_abs"].values
            events = fixation_detection(t=t, x=x, y=y)
            fixations = {"x": [], "y": []}
            fixations_extra_info = {"start_time": [], "end_time": []}
            fixation_duration_list = []
            for (start, end) in events:
                x_position = mean(x[start:end])
                y_position = mean(y_abs[start:end])
                if 700 <= x_position <= 1800:  # To remove the rating button and empty space
                    fixation_duration = t[end] - t[start]
                    fixations["x"].append(x_position)
                    fixations["y"].append(y_position)
                    fixations_extra_info["start_time"].append(t[start])
                    fixations_extra_info["end_time"].append(t[end])
                    fixation_duration_list.append(fixation_duration)
            fixations_df = pd.DataFrame(fixations)
            clustering = AffinityPropagation(random_state=0)
            list_aoi_points = clustering.fit_predict(fixations_df)
            fixations_df["cluster"] = list_aoi_points
            # Rename the clusters to be consistent from up to down
            # Get each cluster's center coordinate ordered by their label 0, 1, ...
            cluster_centers = clustering.cluster_centers_
            cluster_centers_df = pd.DataFrame(cluster_centers, columns=["x", "y"])
            cluster_centers_df = cluster_centers_df.sort_values(by=["x"])  # Sort the clusters
            cluster_centers_df = cluster_centers_df.sort_values(by=["y"], kind="mergesort", ascending=False)
            cluster_centers_df["old_index"] = cluster_centers_df.index  # Save the old labels
            cluster_centers_df = cluster_centers_df.reset_index()
            cluster_centers_df["new_index"] = cluster_centers_df.index  # Save the new labels
            # Change the node labels
            fixations_df["cluster"] = fixations_df["cluster"].replace(cluster_centers_df["old_index"].values.tolist(),
                                                                      cluster_centers_df["new_index"].values.tolist())
            fixations_df["duration"] = fixation_duration_list
            fixations_df["start_time"] = fixations_extra_info["start_time"]
            fixations_df["end_time"] = fixations_extra_info["end_time"]
            
            aoi_scanpath = compute_path(scanpath_graph_representation_type=scanpath_graph_representation, aoi_list=fixations_df["cluster"].tolist())
            aoi_duration_df_fixation_features_sum = fixations_df.groupby(["cluster"]).duration.sum().reset_index().rename(columns={"duration": "sum"}) 
            aoi_duration_df_fixation_features_mean = fixations_df.groupby(["cluster"]).duration.mean().reset_index().rename(columns={"duration": "mean"}) 
            aoi_duration_df_fixation_features_std = fixations_df.groupby(["cluster"]).duration.std().reset_index().rename(columns={"duration": "std"}).fillna(0)
            aoi_duration_df_fixation_features = pd.merge(aoi_duration_df_fixation_features_sum, aoi_duration_df_fixation_features_mean, on="cluster")
            aoi_duration_df_fixation_features = pd.merge(aoi_duration_df_fixation_features, aoi_duration_df_fixation_features_std, on="cluster")
            dict_fixation_features_per_aoi = {k: {"fixation_duration_sum": s, "fixation_duration_mean":m, "fixation_duration_std":std} for k, s, m, std in zip(aoi_duration_df_fixation_features.cluster, aoi_duration_df_fixation_features["sum"], aoi_duration_df_fixation_features["mean"], aoi_duration_df_fixation_features["std"])}
            
            # Edit the format to fit the feature extraction functions
            fixations_df.rename(columns={"x": "gaze_x", "y": "gaze_y", "cluster": "aoi"}, inplace=True)    
            list_fixations = fixations_df.to_dict("records")
            list_stimulus_aois = list(set(list_aoi_points))
            # dict_fixation_counts_per_aoi = dict(Counter(list_aoi_points))  # Count the number of fixations in each AOI
            dict_fixation_counts_per_aoi = fixations_df["aoi"].value_counts().to_dict()
            dict_features_per_aoi = {}
            for stimulus_aoi in list_stimulus_aois:
                dict_features_per_aoi[stimulus_aoi] = {"fixation_counts": 0, "fixation_duration_sum": 0, "fixation_duration_mean": 0, "fixation_duration_std": 0, "scan_distance_h": 0, "scan_distance_v": 0, "scan_distance_euclid": 0, "scan_hv_ratio": 0, "avg_sacc_length": 0, "scan_speed_h": 0, "scan_speed_v": 0, "scan_speed": 0}
            
            # Compute Saccade Features
            dict_saccade_features_per_aoi = compute_saccade_aoi_features(list_fixations=list_fixations, list_stimulus_aois=list_stimulus_aois)
            # Combine Features
            for stimulus_aoi in list_stimulus_aois:
                dict_features_per_aoi[stimulus_aoi]["fixation_counts"] = dict_fixation_counts_per_aoi[stimulus_aoi]
                dict_features_per_aoi[stimulus_aoi]["fixation_duration_sum"] = dict_fixation_features_per_aoi[stimulus_aoi]["fixation_duration_sum"]
                dict_features_per_aoi[stimulus_aoi]["fixation_duration_mean"] = dict_fixation_features_per_aoi[stimulus_aoi]["fixation_duration_mean"]
                dict_features_per_aoi[stimulus_aoi]["fixation_duration_std"] = dict_fixation_features_per_aoi[stimulus_aoi]["fixation_duration_std"]
                dict_features_per_aoi[stimulus_aoi]["scan_distance_h"] = dict_saccade_features_per_aoi[stimulus_aoi]["scan_distance_h"]
                dict_features_per_aoi[stimulus_aoi]["scan_distance_v"] = dict_saccade_features_per_aoi[stimulus_aoi]["scan_distance_v"]
                dict_features_per_aoi[stimulus_aoi]["scan_distance_euclid"] = dict_saccade_features_per_aoi[stimulus_aoi]["scan_distance_euclid"]
                dict_features_per_aoi[stimulus_aoi]["scan_hv_ratio"] = dict_saccade_features_per_aoi[stimulus_aoi]["scan_hv_ratio"]
                dict_features_per_aoi[stimulus_aoi]["avg_sacc_length"] = dict_saccade_features_per_aoi[stimulus_aoi]["avg_sacc_length"]
                dict_features_per_aoi[stimulus_aoi]["scan_speed_h"] = dict_saccade_features_per_aoi[stimulus_aoi]["scan_speed_h"]
                dict_features_per_aoi[stimulus_aoi]["scan_speed_v"] = dict_saccade_features_per_aoi[stimulus_aoi]["scan_speed_v"]
                dict_features_per_aoi[stimulus_aoi]["scan_speed"] = dict_saccade_features_per_aoi[stimulus_aoi]["scan_speed"]   
            dict_features_per_aoi.pop(100, None)  # Delete empty spaces features
            
            # Compute Area Features
            
            dict_area_features_per_aoi = compute_area_aoi_features(list_fixations, list_stimulus_aois, dict_features_per_aoi, stimulus_area=1)
            
            # Create Node Features List
            dict_node_features = {}
    
            if only_two_node_features:
                for stimulus_aoi in list_stimulus_aois:
                    dict_node_features[stimulus_aoi] = [dict_features_per_aoi[stimulus_aoi]["fixation_counts"], dict_features_per_aoi[stimulus_aoi]["fixation_duration_sum"]]
                num_node_features = 2
            else:
                for stimulus_aoi in list_stimulus_aois:
                    dict_node_features[stimulus_aoi] = [dict_features_per_aoi[stimulus_aoi]["fixation_counts"], dict_features_per_aoi[stimulus_aoi]["fixation_duration_sum"], dict_features_per_aoi[stimulus_aoi]["fixation_duration_mean"], dict_features_per_aoi[stimulus_aoi]["fixation_duration_std"], dict_features_per_aoi[stimulus_aoi]["scan_distance_h"], dict_features_per_aoi[stimulus_aoi]["scan_distance_v"], dict_features_per_aoi[stimulus_aoi]["scan_distance_euclid"], dict_features_per_aoi[stimulus_aoi]["scan_hv_ratio"], dict_features_per_aoi[stimulus_aoi]["avg_sacc_length"], dict_features_per_aoi[stimulus_aoi]["scan_speed_h"], dict_features_per_aoi[stimulus_aoi]["scan_speed_v"], dict_features_per_aoi[stimulus_aoi]["scan_speed"], dict_area_features_per_aoi[stimulus_aoi]["box_area"], dict_area_features_per_aoi[stimulus_aoi]["box_area_per_time"], dict_area_features_per_aoi[stimulus_aoi]["fixns_per_box_area"], dict_area_features_per_aoi[stimulus_aoi]["hull_area_per_time"], dict_area_features_per_aoi[stimulus_aoi]["fixns_per_hull_area"]]
                num_node_features = 17
            
            scanpath_aoi_dataclass = AOIDataClass(stimulus=document_id, aoi_scanpath=aoi_scanpath, user_id=user,
                                                  node_features=dict_node_features,
                                                  label=relevance_label,
                                                  system_label=system_relevance,
                                                  gREL_label=gREL_relevance)
            user_graph_data = asdict(extract_graph_data(dataclass_aoi_data=scanpath_aoi_dataclass))
            output_df = pd.concat([output_df, pd.DataFrame([user_graph_data])], ignore_index=True)
    output_df.to_csv(f"../Data/Graph_Data/gREL_{scanpath_graph_representation}_Affinity_{num_node_features}_Features.csv", index=False)