In [1]:
import pickle
import numpy as np
import pandas as pd
from collections import OrderedDict
import re
import os
# os.chdir(r'C:\Users\dexin\PycharmProjects\eeg\PhysioLabXR')

In [2]:
from physiolabxr.scripting.illumiRead.utils.VarjoEyeTrackingUtils.VarjoGazeUtils import VarjoGazeData
from physiolabxr.scripting.illumiRead.utils.gaze_utils.general import GazeFilterFixationDetectionIVT, GazeType
from physiolabxr.scripting.illumiRead.illumiReadSwype.illumiReadSwypeUtils import illumiReadSwypeUserInput

In [6]:
def parse_tuple(val):
    """
    Convert a string tuple like '(-0.5332503 0.01100001)' to a Python tuple.
    """
    return tuple(map(float, val.strip('()').split()))

def parse_letter_locations(gaze_data_path):
    """
    Parse key ground truth locations from the CSV.
    """
    letters = []
    key_ground_truth_local = []

    with open(gaze_data_path, 'r') as file:
        header = file.readline()  # Skip header row
        for line in file:
            if not line.strip() or line.startswith('Key'):  # Skip invalid lines
                continue
            if re.match(r'^[a-zA-Z]', line):  # Match valid letter rows
                parts = line.strip().split(',')
                letters.append(parts[0])  # Letter
                key_ground_truth_local.append(parse_tuple(parts[3]))  # KeyGroundTruthLocal column

    # Group by letter and calculate mean location
    df = pd.DataFrame({'Letter': letters, 'KeyGroundTruthLocal': key_ground_truth_local})
    grouped = df.groupby('Letter')['KeyGroundTruthLocal']
    letter_locations = OrderedDict()
    for letter, group in grouped:
        ground_truth_array = np.array(list(group))
        letter_locations[letter] = np.mean(ground_truth_array, axis=0)
    return letter_locations

def map_fixation_to_letters(fixation_points, user_input_data, letter_locations, radius=0.05):
    """
    Map fixation points to possible letters using user input hit points and letter locations.
    """
    if not fixation_points:
        return []

    # Calculate fixation centroid
    fixation_array = np.array([[point[1][0], point[1][1]] for point in fixation_points])
    centroid = np.mean(fixation_array, axis=0)

    # Filter user input data within fixation timestamps
    fixation_start = fixation_points[0][0]
    fixation_end = fixation_points[-1][0]
    user_input_in_fixation = [
        illumiReadSwypeUserInput(data, timestamp)
        for data, timestamp in zip(user_input_data[0].T, user_input_data[1])
        if fixation_start <= timestamp <= fixation_end
    ]

    # Get keyboard hit points from user input
    keyboard_hit_points = [
        np.array(input_data.keyboard_background_hit_point_local[:2])
        for input_data in user_input_in_fixation
        if not np.array_equal(input_data.keyboard_background_hit_point_local, [1, 1, 1])
    ]

    # Map fixation centroid to letters
    possible_letters = []
    for hit_point in keyboard_hit_points:
        for letter, location in letter_locations.items():
            distance = np.linalg.norm(location - hit_point)
            if distance <= radius:
                possible_letters.append(letter)
    return list(set(possible_letters))


In [7]:
# Load Varjo Eye and User Input Data
pickle_file_path = r'practice.p'  # Path to pickle file containing LSL data
with open(pickle_file_path, 'rb') as file:
    data = pickle.load(file)

# Extract Gaze and User Input Data
gaze_channels = data['VarjoEyeTrackingLSL'][0]  # Shape: (39, N)
gaze_timestamps = data['VarjoEyeTrackingLSL'][1]  # Shape: (N,)
user_input_data = data['illumiReadSwypeUserInputLSL']  # [0]: data (11, M), [1]: timestamps (M,)


In [8]:
print("Keys in the pickle file:", data.keys())

# Print shapes or basic info about each data stream
print("Gaze Data - Channels Shape:", data['VarjoEyeTrackingLSL'][0].shape)
print("Gaze Data - Timestamps Shape:", data['VarjoEyeTrackingLSL'][1].shape)

print("User Input Data - Shape:", data['illumiReadSwypeUserInputLSL'][0].shape)
print("User Input Data - Timestamps Shape:", data['illumiReadSwypeUserInputLSL'][1].shape)


Keys in the pickle file: dict_keys(['DSI24', 'illumiReadSwypeEventMarkerLSL', 'illumiReadSwypeUserInputLSL', 'VarjoEyeTrackingLSL'])
Gaze Data - Channels Shape: (39, 38891)
Gaze Data - Timestamps Shape: (38891,)
User Input Data - Shape: (11, 15844)
User Input Data - Timestamps Shape: (15844,)


In [9]:
# Load ActionInfo CSV and Determine Sweyepe Start Time
action_info_path = r'ActionInfo.csv'  # Path to ActionInfo.csv
action_info = pd.read_csv(action_info_path)

sweyepe_start_time = action_info.loc[action_info['conditionType'] == 'Sweyepe', 'absoluteTime'].min()
if pd.isna(sweyepe_start_time):
    raise ValueError("No sweyepe mode found in ActionInfo.csv")

# Path to Gaze Data CSV for Letter Locations
gaze_data_csv_path = r'GazeData.csv'  # Path to gaze data file
letter_locations = parse_letter_locations(gaze_data_csv_path)


In [10]:
# Initialize IVT Filter for Fixation Detection
ivt_filter = GazeFilterFixationDetectionIVT(angular_speed_threshold_degree=100)
gaze_data_sequence = []
fixation_points_buffer = []

# Filter and Sort Streams by Timestamp (Skip Real-Time Processing)
filtered_gaze_stream = [
    ('gaze', i, timestamp)
    for i, timestamp in enumerate(gaze_timestamps)
    if timestamp >= sweyepe_start_time
]

filtered_user_input_stream = [
    ('user_input', i, timestamp)
    for i, timestamp in enumerate(user_input_data[1])
    if timestamp >= sweyepe_start_time
]

# Merge and sort all data streams
all_data_stream = sorted(filtered_gaze_stream + filtered_user_input_stream, key=lambda x: x[2])

all_data_stream


[('gaze', 1267, 61223.281231122),
 ('gaze', 1268, 61223.286232472),
 ('gaze', 1269, 61223.291234142),
 ('gaze', 1270, 61223.296235567),
 ('user_input', 425, 61223.2968083),
 ('gaze', 1271, 61223.301237323),
 ('gaze', 1272, 61223.306239405996),
 ('gaze', 1273, 61223.311241318),
 ('user_input', 426, 61223.3138396),
 ('gaze', 1274, 61223.316247035),
 ('gaze', 1275, 61223.321248872),
 ('user_input', 427, 61223.325349),
 ('gaze', 1276, 61223.326251077),
 ('gaze', 1277, 61223.331252874996),
 ('user_input', 428, 61223.3355273),
 ('gaze', 1278, 61223.336254739),
 ('gaze', 1279, 61223.341257261),
 ('gaze', 1280, 61223.346258241),
 ('user_input', 429, 61223.348262),
 ('gaze', 1281, 61223.351260281),
 ('gaze', 1282, 61223.35626128301),
 ('user_input', 430, 61223.3577953),
 ('gaze', 1283, 61223.361262752005),
 ('gaze', 1284, 61223.366263914),
 ('user_input', 431, 61223.369073),
 ('gaze', 1285, 61223.3712658),
 ('gaze', 1286, 61223.376266888),
 ('user_input', 432, 61223.379709),
 ('gaze', 1287, 612

In [11]:
# Initialize list to store fixation results: timestamp, fixation points, and possible letters
fixation_results = []  # To store (fixation timestamps, fixation points, possible letters)

# Process Data
for data_type, index, timestamp in all_data_stream:
    if data_type == 'gaze':
        # Process Gaze Data
        gaze_sample = gaze_channels[:, index]
        gaze_data = VarjoGazeData()
        gaze_data.construct_gaze_data_varjo(gaze_sample, timestamp)

        processed_gaze_data = ivt_filter.process_sample(gaze_data)
        gaze_data_sequence.append(processed_gaze_data)

        # Detect Fixation
        if processed_gaze_data.gaze_type == GazeType.FIXATION:
            fixation_points_buffer.append(
                (processed_gaze_data.timestamp, processed_gaze_data.get_combined_eye_gaze_direction()[:2])
            )
        elif fixation_points_buffer:
            # Map Fixation to Letters
            possible_letters = map_fixation_to_letters(
                fixation_points_buffer, user_input_data, letter_locations, radius=0.2
            )
            # Store fixation timestamps, fixation points buffer, and possible letters
            fixation_timestamps = [fp[0] for fp in fixation_points_buffer]  # Extract just the timestamps
            fixation_results.append((fixation_timestamps, fixation_points_buffer.copy(), possible_letters))

            # Clear the fixation points buffer for next detection
            fixation_points_buffer = []

# Displaying a sample of the collected fixation results for inspection
fixation_results[:5]  # Display the first 5 results for review


[([61223.286232472,
   61223.291234142,
   61223.296235567,
   61223.301237323,
   61223.306239405996],
  [(61223.286232472, array([0.07456814, 0.31223035])),
   (61223.291234142, array([0.07410603, 0.30791759])),
   (61223.296235567, array([0.07423705, 0.30150664])),
   (61223.301237323, array([0.07456872, 0.29493171])),
   (61223.306239405996, array([0.07514903, 0.28750139]))],
  []),
 ([61223.321248872,
   61223.326251077,
   61223.331252874996,
   61223.336254739,
   61223.341257261,
   61223.346258241,
   61223.351260281],
  [(61223.321248872, array([0.0775492 , 0.26199815])),
   (61223.326251077, array([0.07878405, 0.25728336])),
   (61223.331252874996, array([0.08071782, 0.25708517])),
   (61223.336254739, array([0.08195324, 0.25909412])),
   (61223.341257261, array([0.0826102 , 0.26163548])),
   (61223.346258241, array([0.08152012, 0.25941139])),
   (61223.351260281, array([0.08010156, 0.25569585]))],
  []),
 ([61223.386270259,
   61223.391272140005,
   61223.396274349,
   6122

In [12]:
# Count how many entries in fixation_results_example have at least one possible letter
count_entries_with_letters = sum(1 for _, _, possible_letters in fixation_results if possible_letters)

count_entries_with_letters

214

In [13]:
print(f"Total data stream entries: {len(all_data_stream)}")

Total data stream entries: 53043


In [14]:
# Let's dissect one instance from `fixation_results` that contains possible letters to see its structure

# Find the first instance in `fixation_results` that has possible letters
example_with_letters = next((fixation_data for fixation_data in fixation_results if fixation_data[2]), None)

# Display the instance for inspection
example_with_letters


([61224.126537138,
  61224.131538101,
  61224.136539695006,
  61224.14154152,
  61224.146542975,
  61224.151545265,
  61224.156550261,
  61224.16155553,
  61224.166557212004,
  61224.171558756,
  61224.176560859,
  61224.181561523,
  61224.186562548,
  61224.191563832,
  61224.196564529,
  61224.201565833995,
  61224.206566571,
  61224.211567939004,
  61224.216569536,
  61224.221570762995,
  61224.226572822,
  61224.231574311,
  61224.236576426,
  61224.241581569,
  61224.246586958005,
  61224.251588358,
  61224.25658972,
  61224.261591343,
  61224.266592325,
  61224.271593769,
  61224.276594273004,
  61224.281595669,
  61224.286596198996,
  61224.291597262,
  61224.296598957,
  61224.301600081,
  61224.306601835,
  61224.31160332,
  61224.316605136,
  61224.321607382,
  61224.326612346995,
  61224.331617456,
  61224.336619779],
 [(61224.126537138, array([-0.04917125, -0.17993778])),
  (61224.131538101, array([-0.04754771, -0.18227983])),
  (61224.136539695006, array([-0.04525286, -0.1

In [15]:
# Update ActionInfo dataframe with possible letters for all timestamps that fall within the fixation timestamp range
# Adding a new column to the ActionInfo dataframe with the possible letters mapped from fixation events

# Initialize an empty list to hold possible letters corresponding to each action in the ActionInfo dataframe
action_info['PossibleLetters'] = None
# Iterating over the gaze processing step again to collect possible letters for each fixation
fixation_points_buffer = []  # Re-initialize fixation buffer for the mapping process

# Create a list to accumulate the fixation points and possible letters for updating the dataframe later
fixation_letter_mapping = []

# Process Data and collect possible letters
for data_type, index, timestamp in all_data_stream:
    if data_type == 'gaze':
        # Process Gaze Data
        gaze_sample = gaze_channels[:, index]
        gaze_data = VarjoGazeData()
        gaze_data.construct_gaze_data_varjo(gaze_sample, timestamp)

        processed_gaze_data = ivt_filter.process_sample(gaze_data)
        gaze_data_sequence.append(processed_gaze_data)

        # Detect Fixation
        if processed_gaze_data.gaze_type == GazeType.FIXATION:
            fixation_points_buffer.append(
                (processed_gaze_data.timestamp, processed_gaze_data.get_combined_eye_gaze_direction()[:2])
            )
        elif fixation_points_buffer:
            # Map Fixation to Letters when fixation ends
            possible_letters = map_fixation_to_letters(
                fixation_points_buffer, user_input_data, letter_locations, radius=0.2
            )
            fixation_points_buffer_data = fixation_points_buffer.copy()
            fixation_points_buffer = []

            # Add the fixation points and possible letters to the mapping list
            fixation_letter_mapping.append((fixation_points_buffer_data, possible_letters))

# Now we will create a mapping of fixation timestamps to possible letters and update the ActionInfo dataframe
# We want to update each row in the ActionInfo with possible letters if it falls within fixation points
for fixation_points, possible_letters in fixation_letter_mapping:
    if possible_letters:
        # Loop through all fixation points in the buffer and update ActionInfo
        for timestamp, _ in fixation_points:
            # Find the closest row in ActionInfo with matching timestamp
            closest_row = action_info.iloc[(action_info['absoluteTime'] - timestamp).abs().argsort()[:1]]
            row_index = closest_row.index[0]
            # Update PossibleLetters column with the list of possible letters
            if action_info.at[row_index, 'PossibleLetters'] is None:
                action_info.at[row_index, 'PossibleLetters'] = possible_letters
            else:
                # If there are already possible letters, merge them to avoid overwriting
                existing_letters = action_info.at[row_index, 'PossibleLetters']
                action_info.at[row_index, 'PossibleLetters'] = list(set(existing_letters + possible_letters))

# Display the updated ActionInfo with the new PossibleLetters column
updated_action_info_head = action_info.head(10)

updated_action_info_head

# Iterate over each fixation event with possible letters
for fixation_timestamps, fixation_points, possible_letters in fixation_results:
    if possible_letters:  # Only consider non-empty possible letters
        # Define the start and end of the fixation period
        start_time = min(fixation_timestamps)
        end_time = max(fixation_timestamps)

        # Find all rows in ActionInfo where the absoluteTime falls within the fixation period
        rows_in_range = action_info[(action_info['absoluteTime'] >= start_time) &
                                        (action_info['absoluteTime'] <= end_time)]

        # Update the PossibleLetters column for each matching row
        for row_index in rows_in_range.index:
            if action_info.at[row_index, 'PossibleLetters'] is None:
                action_info.at[row_index, 'PossibleLetters'] = possible_letters
            else:
                # Merge letters if there are already existing possible letters
                existing_letters = action_info.at[row_index, 'PossibleLetters']
                action_info.at[row_index, 'PossibleLetters'] = list(set(existing_letters + possible_letters))

# Display the updated ActionInfo dataframe with the PossibleLetters column now populated where applicable
updated_action_info_head = action_info.head(10)

updated_action_info_head


Unnamed: 0,absoluteTime,trialTime,deltaTime,trialIndex,eventType,keyboardValue,xKeyHitLocal,yKeyHitLocal,candidate1,candidate2,candidate3,conditionType,currentText,targetText,eyeTrackingStatus,PossibleLetters
0,61147.254532,0.0,0.02,0,,,-inf,-inf,,,,InitState,,The other patrons were taxi drivers and art st...,Available,
1,61149.665579,0.02,0.02,0,,,-inf,-inf,,,,InitState,,The other patrons were taxi drivers and art st...,Available,
2,61149.721329,0.078627,0.058627,0,,,-inf,-inf,,,,InitState,,The other patrons were taxi drivers and art st...,Available,
3,61149.731074,0.088935,0.010307,0,,,-inf,-inf,,,,InitState,,The other patrons were taxi drivers and art st...,Available,
4,61149.742078,0.099923,0.010989,0,,,-inf,-inf,,,,InitState,,The other patrons were taxi drivers and art st...,Available,
5,61149.752971,0.110655,0.010731,0,,,-inf,-inf,,,,InitState,,The other patrons were taxi drivers and art st...,Available,
6,61149.763453,0.121183,0.010528,0,,,-inf,-inf,,,,InitState,,The other patrons were taxi drivers and art st...,Available,
7,61149.775451,0.13355,0.012368,0,,,-inf,-inf,,,,InitState,,The other patrons were taxi drivers and art st...,Available,
8,61149.786579,0.144071,0.010521,0,,,-inf,-inf,,,,InitState,,The other patrons were taxi drivers and art st...,Available,
9,61149.796816,0.154951,0.01088,0,,,-inf,-inf,,,,InitState,,The other patrons were taxi drivers and art st...,Available,


In [16]:
# Count the number of rows in the updated ActionInfo dataframe where 'PossibleLetters' is not None
count_possible_letters_not_none = action_info['PossibleLetters'].notna().sum()

count_possible_letters_not_none


8762

In [17]:
# Finding locations in the new ActionInfo where the 'conditionType' is 'Sweyepe'
sweyepe_events_all = action_info[action_info['conditionType'] == 'Sweyepe']

# Displaying the first few rows of Sweyepe events to understand their context and timestamps
sweyepe_events_all_info = {
    'Number of Sweyepe Events': len(sweyepe_events_all),
    'First Few Sweyepe Events': sweyepe_events_all.head()
}

sweyepe_events_all_info

{'Number of Sweyepe Events': 15419,
 'First Few Sweyepe Events':       absoluteTime  trialTime  deltaTime  trialIndex eventType keyboardValue  \
 6271  61223.276647   73.63022   0.010034           4       NaN           NaN   
 6272  61223.298073   73.65430   0.024075           4       NaN           NaN   
 6273  61223.314175   73.67146   0.017171           4       NaN           NaN   
 6274  61223.325736   73.68272   0.011258           4       NaN           NaN   
 6275  61223.335898   73.69366   0.010939           4       NaN           NaN   
 
       xKeyHitLocal  yKeyHitLocal candidate1 candidate2 candidate3  \
 6271          -inf          -inf        NaN        NaN        NaN   
 6272          -inf          -inf        NaN        NaN        NaN   
 6273          -inf          -inf        NaN        NaN        NaN   
 6274          -inf          -inf        NaN        NaN        NaN   
 6275          -inf          -inf        NaN        NaN        NaN   
 
      conditionType curren

In [18]:
# Finding locations in the ActionInfo where the 'conditionType' is 'Sweyepe' and 'PossibleLetters' is not None
sweyepe_events_with_letters = action_info[
    (action_info['conditionType'] == 'Sweyepe') &
    (action_info['PossibleLetters'].notna())
]

# Displaying the number of Sweyepe events with possible letters and a few example rows
sweyepe_events_with_letters_info = {
    'Number of Sweyepe Events with Possible Letters': len(sweyepe_events_with_letters),
    'First Few Sweyepe Events with Possible Letters': sweyepe_events_with_letters.head()
}

sweyepe_events_with_letters_info


{'Number of Sweyepe Events with Possible Letters': 8762,
 'First Few Sweyepe Events with Possible Letters':       absoluteTime  trialTime  deltaTime  trialIndex eventType keyboardValue  \
 6332  61224.126130   74.48336   0.011080           4       NaN           NaN   
 6333  61224.136355   74.49357   0.010205           4       NaN           NaN   
 6334  61224.147709   74.50446   0.010888           4       NaN           NaN   
 6335  61224.158857   74.51650   0.012045           4       NaN           NaN   
 6336  61224.182577   74.53992   0.023418           4       NaN           NaN   
 
       xKeyHitLocal  yKeyHitLocal candidate1 candidate2 candidate3  \
 6332          -inf          -inf        NaN        NaN        NaN   
 6333          -inf          -inf        NaN        NaN        NaN   
 6334          -inf          -inf        NaN        NaN        NaN   
 6335          -inf          -inf        NaN        NaN        NaN   
 6336          -inf          -inf        NaN        NaN

In [19]:
import pandas as pd
import numpy as np

def create_design_matrix(action_info):
    df = action_info.copy()
    df['DesignMatrixCondition'] = 0

    current_word_index = 0
    current_letter_index = 0
    trailing_letter = None

    def get_word_status(target_text, current_text):
        if pd.isna(current_text):
            return current_word_index
        current_words = current_text.lower().split()
        return len(current_words) if current_words else 0

    for idx, row in df.iterrows():
        if pd.isna(row['targetText']) or not isinstance(row['PossibleLetters'], list):
            continue

        # Check if we should move to next word based on currentText
        completed_words = get_word_status(row['targetText'], row['currentText'])
        if completed_words > current_word_index:
            current_word_index = completed_words
            current_letter_index = 0
            trailing_letter = None

        # Get current target word
        target_words = row['targetText'].lower().split()
        if current_word_index >= len(target_words):
            continue

        target_word = target_words[current_word_index]
        if current_letter_index >= len(target_word):
            continue

        # Get current and trailing target letters
        current_target = target_word[current_letter_index]
        possible_letters = set(letter.lower() for letter in row['PossibleLetters'])

        # Check for target matches
        if current_target in possible_letters:
            df.at[idx, 'DesignMatrixCondition'] = 2
            trailing_letter = current_target
            current_letter_index += 1
        elif trailing_letter and trailing_letter in possible_letters:
            df.at[idx, 'DesignMatrixCondition'] = 2
        elif possible_letters:
            df.at[idx, 'DesignMatrixCondition'] = 1

    return df

# Example usage:
result_df = create_design_matrix(action_info)
result_df.head(5)

Unnamed: 0,absoluteTime,trialTime,deltaTime,trialIndex,eventType,keyboardValue,xKeyHitLocal,yKeyHitLocal,candidate1,candidate2,candidate3,conditionType,currentText,targetText,eyeTrackingStatus,PossibleLetters,DesignMatrixCondition
0,61147.254532,0.0,0.02,0,,,-inf,-inf,,,,InitState,,The other patrons were taxi drivers and art st...,Available,,0
1,61149.665579,0.02,0.02,0,,,-inf,-inf,,,,InitState,,The other patrons were taxi drivers and art st...,Available,,0
2,61149.721329,0.078627,0.058627,0,,,-inf,-inf,,,,InitState,,The other patrons were taxi drivers and art st...,Available,,0
3,61149.731074,0.088935,0.010307,0,,,-inf,-inf,,,,InitState,,The other patrons were taxi drivers and art st...,Available,,0
4,61149.742078,0.099923,0.010989,0,,,-inf,-inf,,,,InitState,,The other patrons were taxi drivers and art st...,Available,,0


In [20]:
# Count the number of rows in the ActionInfo dataframe where 'DesignMatrixCondition' is equal to 2 (Correct Letter)
count_correct_letter = (result_df['DesignMatrixCondition'] == 2).sum()

count_correct_letter

894

In [21]:
# Filter the dataframe to include only rows where 'DesignMatrixCondition' is equal to 2 (Correct Letter)
correct_letter_rows = result_df

# Save the filtered dataframe to a new CSV file
output_file_path_correct = 'updated_action_info_with_design_matrix.csv'
correct_letter_rows.to_csv(output_file_path_correct, index=False)

# Provide the path to the user for download
output_file_path_correct


'updated_action_info_with_design_matrix.csv'

In [22]:
data = pd.read_csv('updated_action_info_with_design_matrix.csv')

In [23]:
data.columns

Index(['absoluteTime', 'trialTime', 'deltaTime', 'trialIndex', 'eventType',
       'keyboardValue', 'xKeyHitLocal', 'yKeyHitLocal', 'candidate1',
       'candidate2', 'candidate3', 'conditionType', 'currentText',
       'targetText', 'eyeTrackingStatus', 'PossibleLetters',
       'DesignMatrixCondition'],
      dtype='object')

In [19]:
'updated_action_info_with_design_matrix.csv'

'updated_action_info_with_design_matrix.csv'