In [1]:
import pandas as pd
import Utilities
import numpy as np

In [2]:
OUTPUT_PATH = "/Users/Mattia/Desktop/PAPER2/COARTICULATION/"
UTILITIES_PATH = "/Users/Mattia/Desktop/PAPER2/UTILITIES/"
with open(UTILITIES_PATH + "subjNames.txt", "r") as f:
    subjNames = f.read().splitlines()
with open(UTILITIES_PATH + "problemList.txt", "r") as f:
    problemList = f.read().splitlines()

In [3]:
# Function to load and filter data based on RESTRICTED_CASE and WITHIN_CASE
def load_and_filter_data(subjName, RESTRICTED_CASE, WITHIN_CASE, classType, relMotif=None):
    if RESTRICTED_CASE:
        df = pd.read_csv(OUTPUT_PATH + str(subjName)+"dfSampleVis.csv", low_memory=False)
        condition = df["motiveNodes"] != "NOTHING"
        filtered_df = df[condition][["EYE_FIRST_CLOSEST_NODE", "motiveNodes"]].apply(
            lambda x: x["EYE_FIRST_CLOSEST_NODE"] == str(Utilities.parse_string(x["motiveNodes"])[-1]), axis=1)
        result_df = df[condition][filtered_df]
        restrictedValues = list(result_df.groupby(["TRIAL_INDEX", "LABEL_FIX"]).groups.keys())
        dfSubj = pd.read_csv(OUTPUT_PATH + str(subjName)+"dfTrajectoryVisResampledV2.csv", low_memory=False)
        dfSubj = dfSubj[dfSubj.apply(lambda x: (x["TRIAL_INDEX"], x["LABEL_FIX"]) in restrictedValues, axis=1)]
    else:
        dfSubj = pd.read_csv(OUTPUT_PATH + str(subjName)+"dfTrajectoryVisResampledV2.csv", low_memory=False)

    dfSubj = dfSubj[dfSubj[classType] != "SingularMotif"]

    #Select only the first segment of the motif
    dfSubj = dfSubj[dfSubj.COUNTER == 0]

    if WITHIN_CASE:
        #dfSubj = dfSubj[dfSubj["motif"].apply(lambda x: len(x) > LENGTH_MIN)]
        dfSubj = dfSubj[dfSubj["motif"].apply(lambda x: len(x) == 2)]
    else:
        dfSubj = dfSubj[dfSubj["relMotif"] == relMotif]
        dfSubj = dfSubj[dfSubj["CLASS"].isin(["N", "E", "W"])]

    dfSubj = dfSubj.groupby(['LABEL_FIX', 'TRIAL_INDEX']).filter(
        lambda group: pd.Series(np.sum(np.sqrt(np.diff(group['x'])**2 + np.diff(group['y'])**2))).between(40, 300).all())
    
    dfSubj = dfSubj.groupby(['LABEL_FIX', 'TRIAL_INDEX']).filter(
        lambda group: pd.Series(group["x"].values[0]).between(-50, 50).all())
    
    dfSubj = dfSubj.groupby(['LABEL_FIX', 'TRIAL_INDEX']).filter(
        lambda group: pd.Series(group["y"].values[0]).between(-100, 150).all())
    
    dfSubj = dfSubj[dfSubj[classType] != "S"]

    return dfSubj

# Function to process each group
def process_group(group, classType):
    #cutoff = 49
    x_values = np.array(group['x'])
    y_values = np.array(group['y'])
    #Drop last cutoff points
    #x_values = x_values#[:-cutoff]
    #y_values = y_values#[:-cutoff]
    #Shuffle the data
    #np.random.shuffle(x_values)
    #np.random.shuffle(y_values)
    xy_matrix = np.hstack((x_values, y_values))
    trial_labels = group['TRIAL_INDEX'].iloc[0]
    label_fix_labels = group['LABEL_FIX'].iloc[0]
    eyeX = group['fixX'].iloc[0]
    eyeY = group['fixY'].iloc[0]
    eyeXY = np.hstack((eyeX, eyeY))
    class_label = group[classType].iloc[0]
    return xy_matrix, trial_labels, label_fix_labels, eyeXY, class_label

In [6]:
volumeDict = {}
pca_coefficientsDict = {}
lda_coefficientsDict = {}
confusion_matrixDict = {}
accuracyMeanDict = {}
accuracyStdDict = {}
all_confusion_matrices = []
all_confusion_matrices_eye = []
casesLs = []

# Main processing loop
for RESTRICTED_CASE, WITHIN_CASE in [(True, True), (True, False), (False, True), (False, False)]:

    case = "WITHIN" if WITHIN_CASE else "BETWEEN"
    if RESTRICTED_CASE:
        case += "_RESTRICTED"
    
    casesLs.append(case)
    print("CASE:", case)

    all_result_matrices = []
    all_class_labels = []
    all_trial_indices = []
    all_label_fixes = []
    all_subj_names = []
    eyePosition = []

    classType = "classWithin" if WITHIN_CASE else "CLASS"
    relMotif = "N" if not WITHIN_CASE else None

    for subjName in subjNames[:]:
        try:
            dfSubj = load_and_filter_data(subjName, RESTRICTED_CASE, WITHIN_CASE, classType, relMotif)
            grouped_data = dfSubj.groupby(["TRIAL_INDEX", "LABEL_FIX", classType]).apply(process_group, classType)

            if not grouped_data.empty:
                #print("group data is not empty")
                result_matrices = np.vstack(grouped_data.apply(lambda x: x[0]))
                trial_indices = grouped_data.apply(lambda x: x[1]).values
                label_fixes = grouped_data.apply(lambda x: x[2]).values
                eyeXY = np.vstack(grouped_data.apply(lambda x: x[3]))
                class_labels = grouped_data.apply(lambda x: x[4]).values
                all_result_matrices.append(result_matrices)
                all_class_labels.append(class_labels)
                all_trial_indices.append(trial_indices)
                all_label_fixes.append(label_fixes)
                all_subj_names.append([subjName] * len(label_fixes))
                eyePosition.append(eyeXY)
        except Exception as e:
            print(f"Error processing subject {subjName}: {e}")
            continue
    
    if not all_result_matrices:
        print(f"No data for case {case}")
        continue

    final_result_matrices = np.vstack(all_result_matrices)
    final_class_labels = np.concatenate(all_class_labels)
    final_trial_indices = np.concatenate(all_trial_indices)
    final_label_fixes = np.concatenate(all_label_fixes)
    final_subj_names = np.concatenate(all_subj_names)
    final_eyePosition = np.vstack(eyePosition)

    #Save the data
    np.save(f"./OUTPUT/{case}_final_result_matrices.npy", final_result_matrices)
    np.save(f"./OUTPUT/{case}_final_class_labels.npy", final_class_labels)
    np.save(f"./OUTPUT/{case}_final_trial_indices.npy", final_trial_indices)
    np.save(f"./OUTPUT/{case}_final_label_fixes.npy", final_label_fixes)
    np.save(f"./OUTPUT/{case}_final_subj_names.npy", final_subj_names)
    np.save(f"./OUTPUT/{case}_final_eyePosition.npy", final_eyePosition)

CASE: WITHIN_RESTRICTED
CASE: BETWEEN_RESTRICTED
CASE: WITHIN
CASE: BETWEEN


In [None]:
#Count the number of each class
from collections import Counter

In [8]:
for case in ["WITHIN_RESTRICTED","BETWEEN_RESTRICTED", "WITHIN", "BETWEEN"]:
    print(case)
    #Import BETWEEEN_RESTRICTED_final_class_labels.npy form OUTPUT folder
    final_class_labels = np.load("./OUTPUT/"+case+"_final_class_labels.npy", allow_pickle=True)
    class_count = Counter(final_class_labels)
    print("Class count:", class_count)
    print("Total number of samples:", len(final_class_labels))
    print("")


WITHIN_RESTRICTED
Class count: Counter({'N': 934, 'W': 229, 'E': 196})
Total number of samples: 1359

BETWEEN_RESTRICTED
Class count: Counter({'W': 1489, 'E': 1438, 'N': 1136})
Total number of samples: 4063

WITHIN
Class count: Counter({'N': 2224, 'W': 443, 'E': 377})
Total number of samples: 3044

BETWEEN
Class count: Counter({'W': 3375, 'E': 3149, 'N': 2775})
Total number of samples: 9299

