In [9]:
import pandas as pd
import numpy as np

# Fixations

In [8]:
def update_format_fix(df):
    # Rename columns
    df = df.rename(columns={
        "fix_id": "CURRENT_FIX_INDEX",
        "sp_fix_dur": "CURRENT_FIX_DURATION",
        "sp_fix_pos": "CURRENT_FIX_INTEREST_AREA_INDEX",
        "repeat": "repeats",
        "unique_paragraph_id": "unique_paragraph_id",
        "text_spacing_version": "text_spacing_version"
    })

    df["position"] = [np.nan]*len(df)

    # Reorder columns
    column_order = [
        "CURRENT_FIX_INDEX", "CURRENT_FIX_DURATION", "position",
        "CURRENT_FIX_INTEREST_AREA_INDEX", "unique_paragraph_id", "text_spacing_version", "repeats"
    ]
    df = df[column_order]
    
    return df

In [11]:
for i in range(10):
    # Load the CSV file
    df = update_format_fix(pd.read_csv(f"results/Eyettention/full_eyettention_output_fold_{i}.csv"))
    # Save transformed CSV
    df.to_csv(f"results/fixationReports/full_eyettention_output_fold_{i}.csv", index=False)
    
    print(f"Finished updating fold {i}")

Finished updating fold 0
Finished updating fold 1
Finished updating fold 2
Finished updating fold 3
Finished updating fold 4
Finished updating fold 5
Finished updating fold 6
Finished updating fold 7
Finished updating fold 8
Finished updating fold 9


# IA

In [22]:
def update_format_ia(df):
    # Define the column mapping
    EYE_MEASURES_TRANSLATION_DICT = {
        "SFD": "IA_SINGLE_FIXATION_DURATION",
        "FFD": "IA_FIRST_FIXATION_DURATION",
        "GD": "IA_FIRST_RUN_DWELL_TIME",
        "TT": "IA_DWELL_TIME",
        "PrF": "probability_of_fixating",
        "Pr1": "probability_of_fixating_once",
        "Pr2": "probability_of_fixating_two_or_more_times",
        "NFixations": "IA_FIXATION_COUNT",
        "NRegIn": "IA_REGRESSION_IN_COUNT",
        "NRegOutFull": "IA_REGRESSION_OUT_FULL_COUNT",
        "NRegOut": "IA_REGRESSION_OUT_COUNT",
        "FirstFixProg": "IA_FIRST_FIX_PROGRESSIVE",
        "PrS": "IA_SKIP",
    }
    
    # Rename columns
    df = df.rename(columns=EYE_MEASURES_TRANSLATION_DICT)
    
    # Compute total_skip
    df["total_skip"] = 1 - df["probability_of_fixating"]

    # calculate text_index
    df["text_index"] = (
        df["IA_ID"].astype(int) < df["IA_ID"].astype(int).shift(1)
    ).cumsum()
    
    # Select and reorder the columns
    columns_to_keep = [
        "IA_SINGLE_FIXATION_DURATION", "IA_FIRST_FIXATION_DURATION", "IA_FIRST_RUN_DWELL_TIME", "IA_DWELL_TIME", "GP", 
        "probability_of_fixating", "probability_of_fixating_once", "probability_of_fixating_two_or_more_times", "IA_SKIP", 
        "IA_REGRESSION_IN_COUNT", "IA_REGRESSION_OUT_COUNT", "IA_REGRESSION_OUT_FULL_COUNT", "FirstPassGD", "FirstPassFFD", 
        "IA_FIRST_FIX_PROGRESSIVE", "IA_FIXATION_COUNT", "IA_ID", "text_index", "unique_paragraph_id", "text_spacing_version", "total_skip"
    ]
    
    df = df[columns_to_keep]
    
    return df


In [23]:
for i in range(10):
    # Load the CSV file
    df = update_format_ia(pd.read_csv(f"ln_shared_data/Eyettention/iaReports/full_eyettention_output_fold_{i}.csv"))
    # Save transformed CSV
    df.to_csv(f"results/iaReports/full_eyettention_output_fold_{i}.csv", index=False)
    
    print(f"Finished updating fold {i}")

Finished updating fold 0
Finished updating fold 1
Finished updating fold 2
Finished updating fold 3
Finished updating fold 4
Finished updating fold 5
Finished updating fold 6
Finished updating fold 7
Finished updating fold 8
Finished updating fold 9
