In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import os
import re

In [2]:
labels = pd.read_csv("empathy_scores.csv", encoding="ISO-8859-1")
labels = labels[['Participant nr', 'Total Score original']]

avg_score = labels['Total Score original'].mean()

labels['Total Score original'] = labels['Total Score original'].apply(lambda x: 1 if x > avg_score else 0)

labels = labels.rename(columns={"Participant nr":"Participant", "Total Score original":"Label"})

In [9]:
dataframes = []
data_labels = []
max_length = 10000
data_path = 'data/grey_blue'
filename_pattern = re.compile(r"grey_blue_participant_(\d+)_trial_(\d+)")


# Making a list of dataframes and adding labels
for filename in os.listdir(data_path):
    match = filename_pattern.match(filename)
    if match:
        participant_id = int(match.group(1))
        matched_rows = labels.loc[labels['Participant'] == participant_id]
        if len(matched_rows) > 0:
            label = matched_rows.iloc[0].values
            data_labels.append(label)
        else:
            print(f"Warning: no matching row found for participant {participant_id}")
        
    df = pd.read_csv(os.path.join(data_path, filename))
    dataframes.append(df)

data_labels = pd.DataFrame(data_labels)

In [14]:
data_labels.to_csv("data_labels.csv", index=False)

In [15]:
num_files = len(dataframes)
# Number of features is based on the first DataFrame's columns
num_features = dataframes[0].shape[1] if num_files > 0 else 0

print(num_files)
print(num_features)

86
40


In [16]:
cleaned_arrays = []
fill_value = 0
for df in dataframes:
    arr = df.to_numpy()  # shape: (time, features)
    T, F = arr.shape
    if T >= max_length:
        arr = arr[:max_length, :]
    elif T < max_length:
        padded = np.full((max_length, F), fill_value, dtype=arr.dtype)
        padded[:T, :] = arr
        arr = padded
    cleaned_arrays.append(arr)


In [18]:
data_tensor = np.stack(cleaned_arrays, axis=0)
print(data_tensor.shape)

(86, 10000, 40)


In [20]:
data_train = data_tensor[:43, :, :]
data_train.shape

(43, 10000, 40)

In [21]:
data_val = data_tensor[43:66, : , :]

In [22]:
data_test = data_tensor[66:, :, :]

In [24]:
np.save("data/tensors/data_train.npy", data_train)

In [25]:
np.save("data/tensors/data_val.npy", data_val)
np.save("data/tensors/data_test.npy", data_test)