The data extraction was based on the provided code, with slight adjustments for where we decided to store the downloaded data:

In [1]:
from typing import Iterator

import numpy as np
import h5py
import os

DATA_PATH = os.path.abspath("../extracted_zip_in_here/Final Project data/")
INTRA_TRAIN_FOLDER = os.path.join(DATA_PATH, os.path.relpath("./Intra/train/"))
INTRA_VAL_FOLDER = os.path.join(DATA_PATH, os.path.relpath("./Intra/val/"))
INTRA_TEST_FOLDER = os.path.join(DATA_PATH, os.path.relpath("./Intra/test/"))

def get_dataset_name(filename_with_dir):
    filename_without_dir = os.path.basename(filename_with_dir)
    temp = filename_without_dir.split('.')[:-1]
    dataset_name = ''.join(temp)
    temp = dataset_name.split('_')[:-1]
    dataset_name = "_".join(temp)
    return dataset_name


def extract_data_from_folder_by_file(folder_path, shuffle=False):
    files = os.listdir(folder_path)
    if shuffle:
        np.random.shuffle(files)

    for file_name in files:
        
        filename_path = os.path.join(folder_path, file_name)
        
        with h5py.File(filename_path, 'r') as f:
            dataset_name = get_dataset_name(filename_path)
            matrix = f.get(dataset_name)[()]
            yield dataset_name, matrix

We first have to scale the data across different files in the same way, so we have to scan the files and find min max to perform the scaling

In [2]:
def learn_minmax_from_all_files(folder_path: str) -> tuple:
    # Placeholders
    min_val = None
    max_val = None

    for (_, data) in extract_data_from_folder_by_file(folder_path):
        data = data.T
        if min_val is None:
            min_val = np.min(data, axis=0)
            max_val = np.max(data, axis=0)
        else:
            # Update min_val and max_val
            min_val = np.minimum(min_val, np.min(data, axis=0))
            max_val = np.maximum(max_val, np.max(data, axis=0))
        
    return min_val, max_val

def scale_data(data: np.ndarray, min_val: np.ndarray, max_val: np.ndarray) -> np.ndarray:
    # Scale the data to the range [0, 1]
    return (data - min_val) / (max_val - min_val)

In [3]:
min_val, max_val = learn_minmax_from_all_files(INTRA_TRAIN_FOLDER)
print(f"Min values: {min_val.shape}, Max values: {max_val.shape}")

Min values: (248,), Max values: (248,)


Because of independent sampling, we can just sample each file independently and the same dropout should occur:

In [4]:
def downsample(data: np.array, factor: float) -> np.array:
    """
    Downsample time series data by uniformly selecting samples at fixed intervals
    to keep the temporal order intact.

    Args:
        data (np.array): Input time series data (1D or 2D with time dimension as first axis)
        factor (float): Downsampling factor (e.g., 0.5 means keep half the samples)

    Returns:
        np.array: Downsampled data with timesteps reduced by the factor
    """
    num_samples = int(len(data) * factor)
    # Calculate the stride to evenly pick samples
    stride = len(data) / num_samples
    # Use np.floor to avoid going out of bounds and convert to int indices
    indices = (np.floor(np.arange(num_samples) * stride)).astype(int)
    downsampled_data = data[indices]
    return downsampled_data

Here we can set the downsample factor for all sampling

In [5]:
DOWNSAMPLE_FACTOR = 0.2

Here, we define the preprocessing steps that we apply to all data after reading it from the file:

In [6]:
intra_preprocessing_pipeline = [
    lambda x: scale_data(x, min_val, max_val), 
    lambda x: downsample(x, DOWNSAMPLE_FACTOR)
]

We should also create labels based on the file names:

In [7]:
def generate_label(file_name:str) -> np.ndarray:
    # Return a one-hot encoded label based on the file name, there are4 classes
    # 0: rest
    if "rest_" in file_name:
        return np.array([1, 0, 0, 0])
    # 1: task_motor
    elif "task_motor_" in file_name:
        return np.array([0, 1, 0, 0])
    # 2: task_story_math
    elif "task_story_math_" in file_name:
        return np.array([0, 0, 1, 0])
    # 3: task_working_memory
    elif "task_working_memory_" in file_name:
        return np.array([0, 0, 0, 1])
    else:
        raise ValueError(f"Unknown file name: {file_name}")

To create batches by number of files, we can use a generator like this:

In [8]:
def create_batches(folder, number_of_files_per_batch: int, preprocessing_pipeline: list = None, shuffle_files=True) -> Iterator[tuple]:
    batch_data = []
    batch_labels = []
    for n, (name, data) in enumerate(extract_data_from_folder_by_file(folder, shuffle=shuffle_files)):
        data = data.T
        if preprocessing_pipeline:
            for preprocessing_step in preprocessing_pipeline:
                data = preprocessing_step(data)
        
        if data.shape[0] != 7124:
            raise ValueError(f"data shaped{data.shape}")

        # Add the preprocessed data to the batch
        batch_data.append(data)

        # Generate the label matrix of the length of the data for the current file
        label_vector = generate_label(name)
        batch_labels.append(label_vector)


        # Check if we have reached the desired batch size
        if (n + 1) % number_of_files_per_batch == 0:
            # Stack along the first axis (like a batch dimension)
            yield (batch_data, batch_labels)
            batch_data = []
            batch_labels = []

In [9]:
import gc

def keras_data_generator(folder, batch_size, preprocessing_pipeline=None, shuffle_files=True):
    while True:  # Required for Keras fit() to work
        gc.collect()
        for batch_X_list, batch_y_list in create_batches(
            folder=folder,
            number_of_files_per_batch=batch_size,
            preprocessing_pipeline=preprocessing_pipeline,
            shuffle_files=shuffle_files
        ):
            data = np.array(batch_X_list)
            labels = np.array(batch_y_list)

            # Shuffle data and labels together
            indices = np.arange(len(data))
            np.random.shuffle(indices)

            if data.shape[0] is None:
                continue

            yield data[indices], labels[indices]

## Training

### Model definition

In [10]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    LSTM,
    Dense,
    Attention,
    LayerNormalization,
    Input,
    Layer,
    Conv1D,
    Dropout,
    Bidirectional,
    BatchNormalization,
    MaxPooling1D,
    Flatten,
    AveragePooling1D,
    MultiHeadAttention,
    GlobalAveragePooling1D,
)
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.optimizers import Adam

FEATURES = 248
TIMESTEPS = 7124
CLASSES = 4

lstm_classifier = Sequential(
    [
        Input((TIMESTEPS, FEATURES)),
        LSTM(64, return_sequences=False),
        Dense(64, activation="relu"),
        Dropout(0.2),
        Dense(CLASSES, activation="softmax"),
    ]
)

lstm_classifier.compile(
    loss=CategoricalCrossentropy(),  # works directly with one-hot encoded labels
    optimizer=Adam(),
    metrics=["accuracy"],
)

bidirectional_lstm = Sequential(
    [
        Input((TIMESTEPS, FEATURES)),
        Bidirectional(LSTM(64, return_sequences=False)),
        Dense(64, activation="relu"),
        Dropout(0.2),
        Dense(CLASSES, activation="softmax"),
    ]
)

bidirectional_lstm.compile(
    loss=CategoricalCrossentropy(), optimizer=Adam(), metrics=["accuracy"]
)

cnn = Sequential(
    [
        Input((TIMESTEPS, FEATURES)),
        Conv1D(filters=64, kernel_size=5, activation="relu"),
        MaxPooling1D(pool_size=2),
        Conv1D(filters=128, kernel_size=5, activation="relu"),
        MaxPooling1D(pool_size=2),
        GlobalAveragePooling1D(),
        Dense(16, activation="relu"),
        Dense(64, activation="relu"),
        Dropout(0.08),
        Dense(CLASSES, activation="softmax"),
    ]
)

cnn.compile(
    loss=CategoricalCrossentropy(),
    optimizer=Adam(learning_rate=0.0005),
    metrics=["accuracy"],
)


class AttentionLayer(Layer):
    def __init__(self):
        super(AttentionLayer, self).__init__()
        self.attention = Attention()

    def call(self, inputs):
        # Use same input as query and value for self-attention
        return self.attention([inputs, inputs])


cnn_self_attention = Sequential(
    [
        Input((TIMESTEPS, FEATURES)),
        Conv1D(filters=64, kernel_size=5, activation="relu"),
        MaxPooling1D(pool_size=2),
        Conv1D(filters=128, kernel_size=5, activation="relu"),
        MaxPooling1D(pool_size=2),
        GlobalAveragePooling1D(),
        LayerNormalization(),  # <--- try here
        AttentionLayer(),
        Dense(16, activation="relu"),
        Dense(64, activation="relu"),
        Dropout(0.08),
        Dense(CLASSES, activation="softmax"),
    ]
)

cnn_self_attention.compile(
    loss=CategoricalCrossentropy(),
    optimizer=Adam(learning_rate=0.0005),
    metrics=["accuracy"],
)

from tensorflow.keras.layers import MultiHeadAttention

class MHAttentionLayer(Layer):
    def __init__(self, num_heads=4, key_dim=32):
        super().__init__()
        self.att = MultiHeadAttention(num_heads=num_heads, key_dim=key_dim)
        self.norm = LayerNormalization()  # <-- this was missing

    def call(self, inputs):
        attn_out = self.att(inputs, inputs)
        return self.norm(attn_out + inputs)  # optional residual connection
    
cnn_multihead_attention = Sequential(
    [
        Input((TIMESTEPS, FEATURES)),
        Conv1D(filters=64, kernel_size=5, activation="relu"),
        MaxPooling1D(pool_size=2),
        Conv1D(filters=128, kernel_size=5, activation="relu"),
        MaxPooling1D(pool_size=2),

        
        LayerNormalization(),  # <--- try here
        MHAttentionLayer(num_heads=4, key_dim=32),

        GlobalAveragePooling1D(),

        Dense(16, activation="relu"),
        Dense(64, activation="relu"),
        Dropout(0.08),
        Dense(CLASSES, activation="softmax"),
    ]
)

cnn_multihead_attention.compile(
    loss=CategoricalCrossentropy(),
    optimizer=Adam(learning_rate=0.0005),
    metrics=["accuracy"],
)


In [11]:
MODELS = {
    "LSTM": lstm_classifier,
    "Bidirectional LSTM": bidirectional_lstm,
    "CNN": cnn,
    "CNN + self attention": cnn_self_attention,
    "CNN + multihead attention": cnn_multihead_attention
}

## Training

In [12]:
EPOCHS = 1000

In [13]:
from tensorflow.keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(
    monitor='val_loss',   # or 'val_accuracy', depending on what you want to track
    patience=10,           # Wait 5 epochs without improvement before stopping
    restore_best_weights=True,  # Roll back to best weights (optional, but recommended)
    verbose=1
)

In [14]:
intra_training_progress = dict()

for name, model in MODELS.items():

    model.compile(
        loss=CategoricalCrossentropy(),
        optimizer=Adam(learning_rate=0.0005),
        metrics=['accuracy']
    )

    intra_training_progress[name] = model.fit(
        keras_data_generator(INTRA_TRAIN_FOLDER, 4, intra_preprocessing_pipeline),
        steps_per_epoch=7,
        epochs=EPOCHS,
        verbose=1,
        validation_data=keras_data_generator(INTRA_VAL_FOLDER, 1, intra_preprocessing_pipeline),
        validation_steps=4,
        callbacks=[early_stopping],
    )

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

In [15]:
import plotly.graph_objects as go
fig = go.Figure()
for name, training in intra_training_progress.items():
    fig.add_trace(go.Scatter(x=np.arange(EPOCHS), y=training.history["loss"], mode='lines', name=f"{name} Training Loss"))


# Customize layout
fig.update_layout(
    title='Intra Training Loss over Epochs',
    xaxis_title='Epoch',
    yaxis_title='Loss',
    yaxis_type='log',         # <-- this makes the y-axis logarithmic
    template='plotly_white'
)

# Show the plot
fig.show()

## Testing

In [16]:
# For storing intra and cross results
results = []

In [17]:


for name, model in MODELS.items():
    loss, accuracy = model.evaluate(
        keras_data_generator(
            INTRA_TEST_FOLDER,
            batch_size=2,
            preprocessing_pipeline=intra_preprocessing_pipeline,
        ),
        steps=4,
        verbose=1,
    )

    results.append({"Model": name, "Task": "Intra", "Loss": loss, "Accuracy": accuracy})

    print(f"Model: {name}, Loss: {loss}, Accuracy: {accuracy}")

Model: LSTM, Loss: 0.0003487993963062763, Accuracy: 1.0
Model: Bidirectional LSTM, Loss: 1.1950623957091011e-05, Accuracy: 1.0
Model: CNN, Loss: 0.0014628693461418152, Accuracy: 1.0
Model: CNN + self attention, Loss: 0.001731989672407508, Accuracy: 1.0
Model: CNN + multihead attention, Loss: 0.0008366784313693643, Accuracy: 1.0


# Cross

In [18]:
CROSS_TRAIN_FOLDER = os.path.join(DATA_PATH, os.path.relpath("./Cross/train/"))
CROSS_VAL_FOLDER = os.path.join(DATA_PATH, os.path.relpath("./Cross/val/"))
CROSS_TEST_1_FOLDER = os.path.join(DATA_PATH, os.path.relpath("./Cross/test1/"))
CROSS_TEST_2_FOLDER = os.path.join(DATA_PATH, os.path.relpath("./Cross/test2/"))
CROSS_TEST_3_FOLDER = os.path.join(DATA_PATH, os.path.relpath("./Cross/test3/"))

We should also scale the features, now based on data from multiple subjects:

In [None]:
min_val, max_val = learn_minmax_from_all_files(CROSS_TRAIN_FOLDER)

Min values: (248,), Max values: (248,)


And define a preprocessing pipeline, it is the same one as before. The order only matters for computation cost, but that is slightly irrelevant for this step.

In [20]:
cross_preprocessing_pipeline = [
    lambda x: scale_data(x, min_val, max_val), 
    lambda x: downsample(x, DOWNSAMPLE_FACTOR)
]

## Cross - training loop

Using the same loop as before, we train the models. We have a slightly lower initial learning rate (0.00005) compared to intra, because we noticed that this helps with convergence, probably due to a harder landscape created by more variety in the training data.

In [21]:
cross_training_progress = dict()

for name, model in MODELS.items():

    model.compile(
        loss=CategoricalCrossentropy(),
        optimizer=Adam(learning_rate=0.00005),
        metrics=['accuracy']
    )

    cross_training_progress[name] = model.fit(
        keras_data_generator(CROSS_TRAIN_FOLDER, 7, cross_preprocessing_pipeline),
        steps_per_epoch=8,
        epochs=EPOCHS,
        verbose=1,
        validation_data=keras_data_generator(CROSS_VAL_FOLDER, 8, cross_preprocessing_pipeline),
        validation_steps=1,
        callbacks=[early_stopping],
    )

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 39: early stopping
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000

## Cross - Convergence plots

Let us plot the convergence of the models:

In [26]:
fig = go.Figure()
for name, training in cross_training_progress.items():
    fig.add_trace(go.Scatter(x=np.arange(EPOCHS), y=training.history["loss"], mode='lines', name=f"{name} Training Loss"))


# Customize layout
fig.update_layout(
    title='Cross Training Loss over Epochs',
    xaxis_title='Epoch',
    yaxis_title='Loss',
    yaxis_type='log',         # <-- this makes the y-axis logarithmic
    template='plotly_white'
)

# Show the plot
fig.show()

In [27]:
fig = go.Figure()
for name, training in cross_training_progress.items():
    fig.add_trace(go.Scatter(x=np.arange(EPOCHS), y=training.history["accuracy"], mode='lines', name=f"{name} Training Accuracy"))


# Customize layout
fig.update_layout(
    title='Cross Training Accuracy over Epochs',
    xaxis_title='Epoch',
    yaxis_title='Loss',
    yaxis_type='log',         # <-- this makes the y-axis logarithmic
    template='plotly_white'
)

# Show the plot
fig.show()

In [22]:
# Collect the test results for each model
for name, model in MODELS.items():

    # Evaluate on the first test folder
    loss_1, accuracy_1 = model.evaluate(
        keras_data_generator(
            CROSS_TEST_1_FOLDER,
            batch_size=2,
            preprocessing_pipeline=cross_preprocessing_pipeline,
        ),
        steps=8,
        verbose=1,
    )

    results.append(
        {"Model": name, "Task": "Cross_1", "Loss": loss_1, "Accuracy": accuracy_1}
    )

    # Evaluate on the second test folder
    loss_2, accuracy_2 = model.evaluate(
        keras_data_generator(
            CROSS_TEST_2_FOLDER,
            batch_size=2,
            preprocessing_pipeline=cross_preprocessing_pipeline,
        ),
        steps=8,
        verbose=1,
    )

    results.append(
        {"Model": name, "Task": "Cross_2", "Loss": loss_2, "Accuracy": accuracy_2}
    )

    # Evaluate on the second test folder
    loss_3, accuracy_3 = model.evaluate(
        keras_data_generator(
            CROSS_TEST_3_FOLDER,
            batch_size=2,
            preprocessing_pipeline=cross_preprocessing_pipeline,
        ),
        steps=8,
        verbose=1,
    )

    results.append(
        {"Model": name, "Task": "Cross_3", "Loss": loss_3, "Accuracy": accuracy_3}
    )
print(results)

[{'Model': 'LSTM', 'Task': 'Intra', 'Loss': 0.0003487993963062763, 'Accuracy': 1.0}, {'Model': 'Bidirectional LSTM', 'Task': 'Intra', 'Loss': 1.1950623957091011e-05, 'Accuracy': 1.0}, {'Model': 'CNN', 'Task': 'Intra', 'Loss': 0.0014628693461418152, 'Accuracy': 1.0}, {'Model': 'CNN + self attention', 'Task': 'Intra', 'Loss': 0.001731989672407508, 'Accuracy': 1.0}, {'Model': 'CNN + multihead attention', 'Task': 'Intra', 'Loss': 0.0008366784313693643, 'Accuracy': 1.0}, {'Model': 'LSTM', 'Task': 'Cross_1', 'Loss': 1.193352222442627, 'Accuracy': 0.3125}, {'Model': 'LSTM', 'Task': 'Cross_2', 'Loss': 2.9333462715148926, 'Accuracy': 0.25}, {'Model': 'LSTM', 'Task': 'Cross_3', 'Loss': 1.2379070520401, 'Accuracy': 0.4375}, {'Model': 'Bidirectional LSTM', 'Task': 'Cross_1', 'Loss': 0.7183987498283386, 'Accuracy': 0.8125}, {'Model': 'Bidirectional LSTM', 'Task': 'Cross_2', 'Loss': 2.9952516555786133, 'Accuracy': 0.25}, {'Model': 'Bidirectional LSTM', 'Task': 'Cross_3', 'Loss': 1.6523751020431519, 

# Plots

In [23]:
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd

In [24]:
results_df = pd.DataFrame(results)
results_df

Unnamed: 0,Model,Task,Loss,Accuracy
0,LSTM,Intra,0.000349,1.0
1,Bidirectional LSTM,Intra,1.2e-05,1.0
2,CNN,Intra,0.001463,1.0
3,CNN + self attention,Intra,0.001732,1.0
4,CNN + multihead attention,Intra,0.000837,1.0
5,LSTM,Cross_1,1.193352,0.3125
6,LSTM,Cross_2,2.933346,0.25
7,LSTM,Cross_3,1.237907,0.4375
8,Bidirectional LSTM,Cross_1,0.718399,0.8125
9,Bidirectional LSTM,Cross_2,2.995252,0.25


In [25]:
fig = go.Figure()

fig = px.bar(
    results_df,
    x="Model",
    y="Loss",
    color="Task",
    barmode="group",
    title="Test Loss"
)
fig.show()