In [1]:
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
from utils import (
    DEGREES_PER_PIXEL,
    TIMESTAMP_IDENT,
    X_PIXELS,
    Y_PIXELS,
    Events,
    extract_gaze_data_between_timestamps_proper,
    get_participant_dominant_eye,
)
import json


from velocityThreshold import detect_fix_ivt, find_sacc_from_fix

# from scipy import ttest_ind

EYE_TRACKER_FOLDER = "eye_tracker_data/"
GAZE_DATA = []
GAZE_DATA_BOOK = []
GAZE_DATA_PAGE = []

# participants that were told that they can change the settings beforehand
participant_ids = [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
subquery = Events.select().where(Events.participant_id.in_(participant_ids))
avg_saccades = {_id: [] for _id in participant_ids}

print(f"Mean , std , difference")

for participant in participant_ids:
    books = subquery.where(
        (Events.event == "OPEN_BOOK")
        & (Events.new_value != "Chasing Sunsets - ")
        & (Events.participant_id == participant)
    )

    average_length = []
    saccades = []
    for i, book in enumerate(books):
        START_TIME_BOOK = book.time
        book_end = (
            subquery.where(Events.event == "CLOSE_BOOK")
            .where(Events.time > START_TIME_BOOK)
            .get()
        )
        END_TIME_BOOK = book_end.time

        # Get the events that have a timestamp less than the first CLOSE_BOOK event
        events_book = subquery.where(Events.time <= END_TIME_BOOK)

        # Read from participants.json whether or not the participant has low resolution data
        # These we should not divide the timestamp with 1000

        LOW_RES = json.load(open("participants.json", "r"))[f"{book.participant_id}"][
            "low_resolution"
        ]
        if not LOW_RES:
            START_TIME_BOOK /= 1000
        formatted_time = datetime.fromtimestamp(START_TIME_BOOK).strftime(
            "%Y-%m-%d_%H-%M-%S"
        )
        GAZE_FILE = f"{EYE_TRACKER_FOLDER}[{book.participant_id}]-{formatted_time}.json"
        f = open(GAZE_FILE, "r")
        GAZE_DATA_BOOK = json.load(f)
        f.close()

        timestamps = []
        x = []
        y = []

        DOMINANT_EYE = get_participant_dominant_eye(participant)

        # for each packet, plot the gaze point
        for packet in GAZE_DATA_BOOK["data"]:
            if packet[f"{DOMINANT_EYE}_gaze_point_validity"] == 0:
                continue
            x.append(
                (packet[f"{DOMINANT_EYE}_gaze_point_on_display_area"][0] * X_PIXELS)
                # * DEGREES_PER_PIXEL
            )
            y.append(
                (packet[f"{DOMINANT_EYE}_gaze_point_on_display_area"][1] * Y_PIXELS)
                # * DEGREES_PER_PIXEL
            )
            timestamps.append(packet[TIMESTAMP_IDENT])

        df = pd.DataFrame({"x": x, "y": y, "ts": timestamps})
        df = df.sort_values(by="ts")
        df = df.reset_index(drop=True)

        df["x"] = df["x"] * DEGREES_PER_PIXEL
        df["y"] = df["y"] * DEGREES_PER_PIXEL
        df["ts"] = df["ts"] / 1_000_000

        # display(df)
        SACCADIC_THRESHOLD = 80
        # Plot fixations
        fixations, v, labels = detect_fix_ivt(df, sacvel=SACCADIC_THRESHOLD)
        saccades_part = find_sacc_from_fix(fixations)
        average_length.append(np.mean(saccades_part["dxy"]))

    print(
        f"{participant} {average_length[0]} {average_length[1]} {np.std(average_length)} {average_length[0] - average_length[1]}"
    )
    avg_saccades[participant] = average_length

# print the mean saccade length for each participant
# display(df.mean())
# df.mean().to_latex("averages.tex")

    # Do a t-test to see if the mean saccade length is significantly different between the two books
    # # Perform t-test
    # t_statistic, p_value = ttest_ind(saccades[0]['len'], saccades[0]['len'])

    # # Print the results
    # print(f"T-Statistic: {t_statistic}")
    # print(f"P-Value: {p_value}")

Mean , std , difference


  rho = cov[0,1] / (sx*sy)


9 4.103903357795357 4.1064080575625175 0.0012523498835803615 -0.002504699767160723
10 6.760034706173849 4.206363633560318 1.2768355363067658 2.5536710726135317
11 5.6218083717600065 4.334914274757255 0.6434470485013759 1.2868940970027518
12 7.686126479475437 7.6987162243174465 0.0062948724210047935 -0.012589744842009587
13 4.575738950524601 3.284641210415162 0.6455488700547194 1.2910977401094388
14 5.988474032434675 4.983667712802527 0.5024031598160739 1.0048063196321477
15 5.108722816970324 4.290359129213496 0.4091818438784136 0.8183636877568272
16 5.117125455894053 4.710085914416548 0.20351977073875238 0.40703954147750476
17 3.9121230997944663 3.552381363847007 0.17987086797372953 0.35974173594745906
18 5.561471458347538 4.730189675483575 0.41564089143198135 0.8312817828639627
19 3.5303400064777106 2.92386153858983 0.30323923394394026 0.6064784678878805
20 5.448268197766597 4.803501462918625 0.3223833674239862 0.6447667348479724
21 3.4516672053431487 3.276487697359725 0.0875897539917

In [6]:
df = pd.DataFrame(avg_saccades)
df = df.T
df = df.rename(columns={0: 'First text', 1: 'Second text'})
# add a columnn to show the differences between book 1 and book 2
df['Difference'] =  df['Second text'] - df['First text']

# add a row to show the average of the columns
df.loc['Mean'] = df.mean()

# round all columns to 3 decimal places
df = df.round(3)


display(df)
df.to_latex('differences.tex')

new_df = df.drop('Mean')
new_df = new_df.drop('Difference', axis=1)
# average of First text and Second text
new_df['Average'] = new_df.mean(axis=1)
new_df = new_df.round(3)
new_df = new_df.drop('First text', axis=1)
new_df = new_df.drop('Second text', axis=1)
display(new_df)

new_df.to_latex('averages.tex')

Unnamed: 0,First text,Second text,Difference
9,4.104,4.106,0.003
10,6.76,4.206,-2.554
11,5.622,4.335,-1.287
12,7.686,7.699,0.013
13,4.576,3.285,-1.291
14,5.988,4.984,-1.005
15,5.109,4.29,-0.818
16,5.117,4.71,-0.407
17,3.912,3.552,-0.36
18,5.561,4.73,-0.831


Unnamed: 0,Average
9,4.105
10,5.483
11,4.978
12,7.692
13,3.93
14,5.486
15,4.7
16,4.914
17,3.732
18,5.146
