In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import glob
from sklearn.model_selection import train_test_split
import os

Get the input and label files from CSVs

In [5]:
def get_train_test_splitted_data(label_files, input_files, global_mean, global_std, test_size=0.2, random_state=42):
    # Initialize lists to hold all sequences
    all_x_sequences = []
    all_y_sequences = []

    # Process each pair of input and label files
    for input_file, label_file in zip(input_files, label_files):
        # Load data
        input_df = pd.read_csv(input_file)
        label_df = pd.read_csv(label_file)

        # Prepare features and labels
        features = (input_df.values - global_mean) / global_std
        
        if type(label_df.iat[2,0]) == int:
            labels = label_df.values / 100
        else:
            labels = pd.get_dummies(label_df, drop_first=False)
        # Ensure alignment of frames
        if features.shape[0] != labels.shape[0]:
            print(f"Mismatch in frames: {input_file}, {label_file}")
            continue

        # Sample sequences
        x_sequences, y_sequences = create_sequences(features, labels, SEQUENCE_LENGTH, STRIDE)

        # Append to global lists
        all_x_sequences.append(x_sequences)
        all_y_sequences.append(y_sequences)

    # Concatenate all sequences from all files
    all_x_sequences = np.concatenate(all_x_sequences, axis=0)
    all_y_sequences = np.concatenate(all_y_sequences, axis=0)

    # Split into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(
        all_x_sequences, all_y_sequences, test_size=test_size, random_state=random_state
    )

    # Convert to TensorFlow datasets
    train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
    test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))

    # Shuffle, batch, and prefetch
    train_dataset = train_dataset.shuffle(buffer_size=10000).batch(BATCH_SIZE).prefetch(tf.data.experimental.AUTOTUNE)
    test_dataset = test_dataset.batch(BATCH_SIZE).prefetch(tf.data.experimental.AUTOTUNE)

    return train_dataset, test_dataset

In [6]:
# Constants
SEQUENCE_LENGTH = 10
STRIDE = 5
BATCH_SIZE = 32
INPUT_DIM = 515  # Number of features per frame (e.g., biosignals + embeddings)
OUTPUT_DIM = 6 

In [4]:
# Helper function to create random sequences
def create_sequences(features, labels, sequence_length, stride):
    x_sequences, y_sequences = [], []
    for i in range(0, len(features) - sequence_length + 1, stride):
        x_sequences.append(features[i:i + sequence_length])
        y_sequences.append(labels[i:i + sequence_length])
    return np.array(x_sequences), np.array(y_sequences)

# Initialize lists to hold all sequences
all_x_sequences = []
all_y_sequences = []
all_features = []

sources = ["GUT", "ITU-YU", "MAAP"]
base_path = "//153.19.52.107/emboa/IO3-sessions/NEW STRUCTURE/de-earlyfusionthesis/"
input_files, label_files = [], []

for source in sources:
    input_files.extend(glob.glob(os.path.join(base_path, source, '*_input.csv')))
    label_files.extend(glob.glob(os.path.join(base_path, source, '*_BORIS.csv')))

input_files.sort()
label_files.sort()


for input_file in input_files:
    input_df = pd.read_csv(input_file)
    all_features.append(input_df.values)

# Concatenate all features from all files to compute global mean and std
all_features = np.concatenate(all_features, axis=0)
global_mean = all_features.mean(axis=0)
global_std = all_features.std(axis=0)

# Ensure no division by zero
global_std[global_std == 0] = 1

GUT_path_input = sorted(glob.glob('//153.19.52.107/emboa/IO3-sessions/NEW STRUCTURE/de-earlyfusionthesis/GUT/*_input.csv'))
ITU_YU_path_input = sorted(glob.glob('//153.19.52.107/emboa/IO3-sessions/NEW STRUCTURE/de-earlyfusionthesis/ITU-YU/*_input.csv'))
MAAP_path_input = sorted(glob.glob('//153.19.52.107/emboa/IO3-sessions/NEW STRUCTURE/de-earlyfusionthesis/MAAP/*_input.csv'))

GUT_path_label_method_I = sorted(glob.glob('//153.19.52.107/emboa/IO3-sessions/NEW STRUCTURE/de-earlyfusionthesis/GUT/*_BORIS_method_I.csv'))
ITU_YU_path_label_method_I = sorted(glob.glob('//153.19.52.107/emboa/IO3-sessions/NEW STRUCTURE/de-earlyfusionthesis/ITU-YU/*_BORIS_method_I.csv'))
MAAP_path_label_method_I = sorted(glob.glob('//153.19.52.107/emboa/IO3-sessions/NEW STRUCTURE/de-earlyfusionthesis/MAAP/*_BORIS_method_I.csv'))

GUT_path_label_method_II = sorted(glob.glob('//153.19.52.107/emboa/IO3-sessions/NEW STRUCTURE/de-earlyfusionthesis/GUT/*_BORIS_method_II.csv'))
ITU_YU_path_label_method_II = sorted(glob.glob('//153.19.52.107/emboa/IO3-sessions/NEW STRUCTURE/de-earlyfusionthesis/ITU-YU/*_BORIS_method_II.csv'))
MAAP_path_label_method_II = sorted(glob.glob('//153.19.52.107/emboa/IO3-sessions/NEW STRUCTURE/de-earlyfusionthesis/MAAP/*_BORIS_method_II.csv'))

GUT_train_method_I, GUT_test_method_I = get_train_test_splitted_data(GUT_path_label_method_I, GUT_path_input, global_mean, global_std, test_size=0.3)
ITU_YU_train_method_I, ITU_YU_test_method_I = get_train_test_splitted_data(ITU_YU_path_label_method_I, ITU_YU_path_input, global_mean, global_std, test_size=0.3)
MAAP_train_method_I, MAAP_test_method_I = get_train_test_splitted_data(MAAP_path_label_method_I, MAAP_path_input, global_mean, global_std, test_size=0.3)

GUT_train_method_II, GUT_test_method_II = get_train_test_splitted_data(GUT_path_label_method_II, GUT_path_input, global_mean, global_std, test_size=0.3)
ITU_YU_train_method_II, ITU_YU_test_method_II = get_train_test_splitted_data(ITU_YU_path_label_method_II, ITU_YU_path_input, global_mean, global_std, test_size=0.3)
MAAP_train_method_II, MAAP_test_method_II = get_train_test_splitted_data(MAAP_path_label_method_II, MAAP_path_input, global_mean, global_std, test_size=0.3)

dataset_method_I = GUT_train_method_I.concatenate(ITU_YU_train_method_I).concatenate(MAAP_train_method_I)
dataset_method_II = GUT_train_method_II.concatenate(ITU_YU_train_method_II).concatenate(MAAP_train_method_II)

TypeError: unsupported operand type(s) for /: 'str' and 'int'

In [49]:
path = r'Z:\IO3-sessions\NEW STRUCTURE\de-earlyfusionthesis\Datasets'

tf.data.experimental.save(dataset_method_I ,os.path.join(path, 'train_dataset_method_I'))
tf.data.experimental.save(GUT_train_method_I, os.path.join(path, 'GUT_train_method_I'))
tf.data.experimental.save(GUT_test_method_I, os.path.join(path, 'GUT_test_method_I'))
tf.data.experimental.save(ITU_YU_train_method_I, os.path.join(path, 'ITU_YU_train_method_I'))
tf.data.experimental.save(ITU_YU_test_method_I, os.path.join(path, 'ITU_YU_test_method_I'))
tf.data.experimental.save(MAAP_train_method_I, os.path.join(path, 'MAAP_train_method_I'))
tf.data.experimental.save(MAAP_test_method_I, os.path.join(path, 'MAAP_test_method_I'))

tf.data.experimental.save(dataset_method_II ,os.path.join(path, 'train_dataset_method_II'))
tf.data.experimental.save(GUT_train_method_II, os.path.join(path, 'GUT_train_method_II'))
tf.data.experimental.save(GUT_test_method_II, os.path.join(path, 'GUT_test_method_II'))
tf.data.experimental.save(ITU_YU_train_method_II, os.path.join(path, 'ITU_YU_train_method_II'))
tf.data.experimental.save(ITU_YU_test_method_II, os.path.join(path, 'ITU_YU_test_method_II'))
tf.data.experimental.save(MAAP_train_method_II, os.path.join(path, 'MAAP_train_method_II'))
tf.data.experimental.save(MAAP_test_method_II, os.path.join(path, 'MAAP_test_method_II'))

In [19]:
for x_batch, y_batch in dataset.take(30):
    print(f'Feature batch shape: {x_batch.shape}')
    print(f'Label batch shape: {y_batch.shape}')

Feature batch shape: (32, 10, 515)
Label batch shape: (32, 10, 6)
Feature batch shape: (32, 10, 515)
Label batch shape: (32, 10, 6)
Feature batch shape: (32, 10, 515)
Label batch shape: (32, 10, 6)
Feature batch shape: (32, 10, 515)
Label batch shape: (32, 10, 6)
Feature batch shape: (32, 10, 515)
Label batch shape: (32, 10, 6)
Feature batch shape: (32, 10, 515)
Label batch shape: (32, 10, 6)
Feature batch shape: (32, 10, 515)
Label batch shape: (32, 10, 6)
Feature batch shape: (32, 10, 515)
Label batch shape: (32, 10, 6)
Feature batch shape: (32, 10, 515)
Label batch shape: (32, 10, 6)
Feature batch shape: (32, 10, 515)
Label batch shape: (32, 10, 6)
Feature batch shape: (32, 10, 515)
Label batch shape: (32, 10, 6)
Feature batch shape: (32, 10, 515)
Label batch shape: (32, 10, 6)
Feature batch shape: (32, 10, 515)
Label batch shape: (32, 10, 6)
Feature batch shape: (32, 10, 515)
Label batch shape: (32, 10, 6)
Feature batch shape: (32, 10, 515)
Label batch shape: (32, 10, 6)
Feature ba

## MODEL I

In [None]:
model_I = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(SEQUENCE_LENGTH, INPUT_DIM)),    # Input shape: (sequence_length, features)
    tf.keras.layers.LSTM(64, return_sequences=True),              # LSTM layer to capture temporal patterns
    tf.keras.layers.Dense(32, activation='relu'),                 # Dense layer to reduce dimensionality
    tf.keras.layers.Dense(OUTPUT_DIM+2, activation='softmax')       # Output layer with sigmoid for continuous values between 0 and 1
])

In [None]:
model_I.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
history = model_I.fit(dataset, epochs=50)

In [None]:
model_I.summary()

In [None]:
# Evaluate the model
loss, acc = model_I.evaluate(MAAP_test, verbose=2)
print("Untrained model, accuracy: {:5.2f}%".format(acc))

In [None]:
#model_I.save(os.path.join(path, 'saved_model/model.keras'))

## MODEL II

In [52]:
model_II = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(SEQUENCE_LENGTH, INPUT_DIM)),    # Input shape: (sequence_length, features)
    tf.keras.layers.LSTM(64, return_sequences=True),              # LSTM layer to capture temporal patterns
    tf.keras.layers.Dense(32, activation='relu'),                 # Dense layer to reduce dimensionality
    tf.keras.layers.Dense(OUTPUT_DIM, activation='sigmoid')       # Output layer with sigmoid for continuous values between 0 and 1
])

In [53]:
model_II.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

In [54]:
history = model_II.fit(dataset, epochs=50)

Epoch 1/50
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - loss: 0.0922 - mae: 0.2263
Epoch 2/50
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0359 - mae: 0.0771
Epoch 3/50
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - loss: 0.0339 - mae: 0.0707
Epoch 4/50
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0324 - mae: 0.0672
Epoch 5/50
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0314 - mae: 0.0639
Epoch 6/50
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0301 - mae: 0.0625
Epoch 7/50
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0291 - mae: 0.0615
Epoch 8/50
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0284 - mae: 0.0590
Epoch 9/50
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms

In [56]:
model_II.summary()

In [72]:
# Evaluate the model
loss, acc = model_II.evaluate(MAAP_test, verbose=2)
print("Untrained model, coherence: {:5.2f}%".format(100 * (1-acc)))

72/72 - 0s - 3ms/step - loss: 0.0355 - mae: 0.0604
Untrained model, accuracy: 93.96%


In [None]:
#model_II.save(os.path.join(path, 'saved_model/model.keras'))