In [None]:
from DataGeneration.CNN_Data_Loader import SciBot_DataLoader
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [None]:
def generate_scanpath_grel(df, path):
    # Create a black background figure with specified size and DPI
    plt.figure(figsize=(25.6, 14.4), dpi=100, facecolor='black')
    plt.axis('off')  # Turn off the axis
    
    # Define fixation durations and corresponding marker properties
    fixation_levels = {
        'Level 1': {'duration_range': (110, 250), 'marker': 'o', 'color': 'red', 'size': 6},
        'Level 2': {'duration_range': (250, 400), 'marker': '*', 'color': 'purple', 'size': 12},
        'Level 3': {'duration_range': (400, 550), 'marker': 'p', 'color': 'yellow', 'size': 18},
        'Level 4': {'duration_range': (550, np.inf), 'marker': 'x', 'color': 'white', 'size': 24}
    }
    
    # Plot saccades as straight lines
    for i in range(len(df) - 1):
        plt.plot([df.iloc[i]['avg_gaze_x'], df.iloc[i+1]['avg_gaze_x']], [df.iloc[i]['avg_gaze_y'], df.iloc[i+1]['avg_gaze_y']], color=plt.cm.winter(i / len(df)))
    
    # Plot fixations
    for _, fixation in df.iterrows():
        for level, properties in fixation_levels.items():
            if properties['duration_range'][0] <= fixation['duration'] < properties['duration_range'][1]:
                plt.plot(fixation['avg_gaze_x'], fixation['avg_gaze_y'], marker=properties['marker'], color=properties['color'], markersize=properties['size'])

    plt.gca().set_aspect('equal', adjustable='box')
    plt.title('Scanpath')
    plt.xlim(0, 2560)
    plt.ylim(0, 1440)
    
    # Save the plot as an image with black background
    plt.savefig(path, facecolor='black', bbox_inches='tight', pad_inches=0)  # Use facecolor='black' to set black background
    plt.close()

In [None]:
def generate_scanpath_nq(df, path):
    # Create a black background figure with specified size and DPI
    plt.figure(figsize=(25.6, 14.4), dpi=100, facecolor='black')
    plt.axis('off')  # Turn off the axis
    # Define fixation durations and corresponding marker properties
    fixation_levels = {
        'Level 1': {'duration_range': (110, 250), 'marker': 'o', 'color': 'red', 'size': 6},
        'Level 2': {'duration_range': (250, 400), 'marker': '*', 'color': 'purple', 'size': 12},
        'Level 3': {'duration_range': (400, 550), 'marker': 'p', 'color': 'yellow', 'size': 18},
        'Level 4': {'duration_range': (550, np.inf), 'marker': 'x', 'color': 'white', 'size': 24}
    }
    # Plot saccades as straight lines
    for i in range(len(df) - 1):
        if df.iloc[i]['fixation_id'] - df.iloc[i + 1]['fixation_id'] == -1:
            plt.plot([df.iloc[i]['avg_gaze_x'], df.iloc[i+1]['avg_gaze_x']], [df.iloc[i]['avg_gaze_y'], df.iloc[i+1]['avg_gaze_y']], color=plt.cm.winter(i / len(df)))
    # Plot fixations
    for _, fixation in df.iterrows():
        for level, properties in fixation_levels.items():
            if properties['duration_range'][0] <= fixation['duration'] < properties['duration_range'][1]:
                plt.plot(fixation['avg_gaze_x'], fixation['avg_gaze_y'], marker=properties['marker'], color=properties['color'], markersize=properties['size'])
 
    plt.gca().set_aspect('equal', adjustable='box')
    plt.title('Scanpath')
    plt.xlim(0, 2560)
    plt.ylim(0, 1440)
    # Save the plot as an image with black background
    plt.savefig(path, facecolor='black', bbox_inches='tight', pad_inches=0)  # Use facecolor='black' to set black background
    plt.close()

In [None]:
# load data
users_ids = ["A01", "A03", "A04", "A06", "A07", "A08", "A09", "A10", "A11", "A12", "A13",
             "B01", "B02", "B03", "B04", "B05", "B06", "B07", "B08", "B09", "B10", "B11", "B12", "B13"]
dataloader = SciBot_DataLoader(data_dir="../Data/Eye_Tracking_Data", include_users=users_ids,
                               gaze_data=True, reading_task=True, rating_task=False, training_data=False, gREL=True)

In [None]:
# we will convert the full gaze recording to fixation recording only with format fixation_id,timestamp fixation_x, fixation_y, duration
grel_output_list = []
corpus = "g-rel"
for user in dataloader.grel_reading.keys():
    for stimulus in dataloader.grel_reading[user]:
        grel_df = dataloader.grel_reading[user][stimulus]["dataframe"]
        event_df = grel_df.groupby("fixation_id").agg(
                        timestamp=("timestamp", "min"),  # Rename and find the minimum timestamp
                        max_timestamp=("timestamp", "max"),  # Find the maximum timestamp
                        avg_gaze_x=("gaze_x", "mean"),  # Calculate the average gaze_x
                        avg_gaze_y=("gaze_y", "mean")  # Calculate the average gaze_y
                    )
        # Calculate duration
        event_df["duration"] = event_df["max_timestamp"] - event_df["timestamp"]
        pr = dataloader.grel_reading[user][stimulus]["perceived_relevance"][0]
        sr = dataloader.grel_reading[user][stimulus]["system_relevance"][0]
        # Drop the max_timestamp and min_timestamp columns if not needed
        event_df = event_df.drop(columns=["max_timestamp"])
        event_df: pd.DataFrame
        # change timestamp to ms
        event_df["timestamp"] *= 1000
        event_df["duration"] *=1000
        csv_path = f"../Data/CNN_Data/event_data/{user}_{stimulus}_{pr}_{sr}.csv"
        image_path = f"../Data/CNN_Data/event_data/{user}_{stimulus}_{pr}_{sr}.png"
        event_df.to_csv(csv_path)
        generate_scanpath_grel(event_df, image_path)
        gREL_relevance = dataloader.grel_reading[user][stimulus]["g-rel_relevance"][0]  # Needed to extract topical data
        grel_output_list.append({"user_id": user, "corpus": corpus, "stimulus":stimulus, "label": pr, "system_label": sr, "gREL_label": gREL_relevance, "img_path": image_path})            
    print(f"{user} finished")

grel_output_df = pd.DataFrame(grel_output_list)
grel_output_df.to_csv(f"../Data/CNN_Data/gREL_event_data.csv", index=False)

In [None]:
google_output_list = []
corpus = "nq"
for user in dataloader.google_nq_reading.keys():
    for stimulus in dataloader.google_nq_reading[user]:
        google_df = dataloader.google_nq_reading[user][stimulus]["dataframe"]
        for paragraph in range(dataloader.google_nq_reading[user][stimulus]["num_paragraphs"]):
            paragraph_df = google_df[google_df["paragraph_id"] == paragraph]
            event_df = paragraph_df.groupby("fixation_id", as_index=False).agg(
                    timestamp=("timestamp", "min"),  # Rename and find the minimum timestamp
                    max_timestamp=("timestamp", "max"),  # Find the maximum timestamp
                    avg_gaze_x=("gaze_x", "mean"),  # Calculate the average gaze_x
                    avg_gaze_y=("gaze_y", "mean")  # Calculate the average gaze_y
                )
            # Calculate duration
            event_df["duration"] = event_df["max_timestamp"] - event_df["timestamp"]
            pr = dataloader.google_nq_reading[user][stimulus]["perceived_relevance"][paragraph]
            sr = dataloader.google_nq_reading[user][stimulus]["system_relevance"][paragraph]
            # Drop the max_timestamp and min_timestamp columns if not needed
            event_df = event_df.drop(columns=["max_timestamp"])
            event_df: pd.DataFrame
            # change timestamp to ms
            event_df["timestamp"] *= 1000
            event_df["duration"] *=1000
            csv_path = f"../Data/CNN_Data/event_data/{user}_{stimulus}_{paragraph}_{pr}_{sr}.csv"
            image_path = f"../Data/CNN_Data/event_data/{user}_{stimulus}_{paragraph}_{pr}_{sr}.png"
            event_df.to_csv(csv_path)
            generate_scanpath_nq(event_df, image_path)
            google_output_list.append({"user_id": user, "corpus": corpus, "stimulus":stimulus, "paragraph_id": paragraph, "label": pr, "system_label": sr, "img_path": image_path})
    print(f"{user} finished")
    
google_output_df = pd.DataFrame(google_output_list)
google_output_df.to_csv(f"../Data/CNN_Data/GoogleNQ_event_data.csv", index=False)