In [1]:
from typing import Iterator

import numpy as np
import h5py
import os
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.optimizers import Adam
from scipy.signal import butter, filtfilt
from sklearn.metrics import classification_report

# --- Configuration ---
DATA_PATH = os.path.abspath("../extracted_zip_in_here/Final Project data/")
INTRA_TRAIN_FOLDER = os.path.join(DATA_PATH, "Intra/train")
INTRA_TEST_FOLDER = os.path.join(DATA_PATH, "Intra/test")
DOWNSAMPLE_FACTOR = 0.7

# --- File and Data Handling ---
def get_dataset_name(filepath: str) -> str:
    filename = os.path.basename(filepath)
    return "_".join(filename.split('.')[:-1][0].split('_')[:-1])

def extract_data_from_folder(folder_path: str, shuffle: bool = False) -> Iterator[tuple[str, np.ndarray]]:
    files = os.listdir(folder_path)
    if shuffle:
        np.random.shuffle(files)
    for file_name in files:
        with h5py.File(os.path.join(folder_path, file_name), 'r') as f:
            dataset_name = get_dataset_name(file_name)
            yield dataset_name, f[dataset_name][()].T  # transpose once here

def learn_minmax(folder_path: str) -> tuple[np.ndarray, np.ndarray]:
    min_val, max_val = None, None
    for _, data in extract_data_from_folder(folder_path):
        min_data, max_data = np.min(data, axis=0), np.max(data, axis=0)
        min_val = min_data if min_val is None else np.minimum(min_val, min_data)
        max_val = max_data if max_val is None else np.maximum(max_val, max_data)
    return min_val, max_val

def generate_label(name: str) -> np.ndarray:
    classes = ["rest", "task_motor", "task_story_math", "task_working_memory"]
    for i, cls in enumerate(classes):
        if cls + "_" in name:
            label = np.zeros(len(classes))
            label[i] = 1
            return label
    raise ValueError(f"Unknown file name: {name}")

# --- Preprocessing ---
def scale_data(data: np.ndarray, min_val: np.ndarray, max_val: np.ndarray) -> np.ndarray:
    return (data - min_val) / (max_val - min_val)

def downsample(data: np.ndarray, factor: float) -> np.ndarray:
    num_samples = int(len(data) * factor)
    indices = np.floor(np.arange(num_samples) * (len(data) / num_samples)).astype(int)
    return data[indices]

def add_gaussian_noise(data: np.ndarray, stddev: float = 0.01) -> np.ndarray:
    noise = np.random.normal(0, stddev, data.shape)
    return data + noise

def time_shift(data: np.ndarray, shift_max: int = 10) -> np.ndarray:
    shift = np.random.randint(-shift_max, shift_max)
    return np.roll(data, shift, axis=0)

def channel_dropout(data: np.ndarray, dropout_rate: float = 0.1) -> np.ndarray:
    num_channels = data.shape[1]
    mask = np.random.rand(num_channels) > dropout_rate
    return data * mask[np.newaxis, :]

def random_scaling(data: np.ndarray, scale_range=(0.9, 1.1)) -> np.ndarray:
    scale = np.random.uniform(*scale_range)
    return data * scale

def bandpass_filter(data: np.ndarray, lowcut=0.5, highcut=30.0, fs=250.0, order=3) -> np.ndarray:
    nyq = 0.5 * fs
    low = lowcut / nyq
    high = highcut / nyq
    b, a = butter(order, [low, high], btype='band')
    return filtfilt(b, a, data, axis=0)

def zscore_per_channel(data: np.ndarray) -> np.ndarray:
    mean = np.mean(data, axis=0)
    std = np.std(data, axis=0)
    std[std == 0] = 1  # Prevent division by zero
    return (data - mean) / std

def baseline_correction(data: np.ndarray, baseline_duration=500) -> np.ndarray:
    if data.shape[0] < baseline_duration:
        return data
    baseline_mean = np.mean(data[:baseline_duration], axis=0)
    return data - baseline_mean

# --- Batching ---
def create_batches(folder: str, batch_size: int, preprocessing: list = None, shuffle: bool = False) -> Iterator[tuple[np.ndarray, np.ndarray]]:
    batch_data, batch_labels = [], []
    for i, (name, data) in enumerate(extract_data_from_folder(folder, shuffle)):
        if preprocessing:
            for fn in preprocessing:
                data = fn(data)
        batch_data.append(data)
        batch_labels.append(generate_label(name))

        if (i + 1) % batch_size == 0:
            yield np.array(batch_data), np.array(batch_labels)
            batch_data, batch_labels = [], []
    if batch_data:
        yield np.array(batch_data), np.array(batch_labels)

# --- Model ---
def create_model(input_shape) -> Sequential:
    model = Sequential([
        LSTM(64, return_sequences=False, input_shape=input_shape),
        Dense(64, activation='relu'),
        Dense(4, activation='softmax')
    ])
    model.compile(
        loss=CategoricalCrossentropy(),
        optimizer=Adam(),
        metrics=['accuracy']
    )
    return model

def train_model(model, train_folder: str, epochs: int = 10, batch_size: int = 8, preprocessing: list = None):
    for epoch in range(epochs):
        print(f"Epoch {epoch+1}/{epochs}")
        for batch_X, batch_y in create_batches(train_folder, batch_size, preprocessing, shuffle=False): # TODO was True
            indices = np.random.permutation(len(batch_X))
            model.fit(batch_X[indices], batch_y[indices], verbose=0)
    return model

def evaluate_model(model, folder: str, batch_size: int, preprocessing: list) -> tuple[float, float]:
    losses, accuracies = [], []
    for batch_X, batch_y in create_batches(folder, batch_size, preprocessing, shuffle=False):
        loss, acc = model.evaluate(batch_X, batch_y, verbose=0)
        losses.append(loss)
        accuracies.append(acc)
    return np.mean(losses), np.mean(accuracies)

def detailed_evaluation(model, folder, batch_size, preprocessing):
    y_true, y_pred = [], []
    for batch_X, batch_y in create_batches(folder, batch_size, preprocessing, shuffle=False):
        preds = model.predict(batch_X)
        y_true.extend(np.argmax(batch_y, axis=1))
        y_pred.extend(np.argmax(preds, axis=1))
    print(classification_report(y_true, y_pred))


2025-06-12 10:14:05.499823: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-06-12 10:14:05.613347: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-06-12 10:14:05.699469: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1749716045.788594    3953 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1749716045.813253    3953 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1749716045.993528    3953 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linkin

In [2]:
# --- Run Training and Evaluation ---
min_val, max_val = learn_minmax(INTRA_TRAIN_FOLDER)
print(f"Learned min/max shapes: {min_val.shape}, {max_val.shape}")

preprocessing_pipeline = [
    # baseline_correction,
    # bandpass_filter,
    lambda x: scale_data(x, min_val, max_val),
    lambda x: downsample(x, DOWNSAMPLE_FACTOR),
    add_gaussian_noise, # These are all added for more general performance ()
    # time_shift,
    channel_dropout
    # random_scaling
]

###########################
def load_sample(data_folder):
    # Look for files with the .h5 extension.
    sample_files = [f for f in os.listdir(data_folder) if f.endswith('.h5')]
    if not sample_files:
        raise ValueError("No .h5 files found in the folder!")
        
    sample_path = os.path.join(data_folder, sample_files[0])
    
    # Open the file and load data from the first available dataset.
    with h5py.File(sample_path, 'r') as f:
        dataset_keys = list(f.keys())
        if not dataset_keys:
            raise ValueError("No dataset found in the file!")
        # Adjust the key ('data', for example) if needed.
        sample = f[dataset_keys[0]][:]
    return sample

def determine_input_shape(data_folder, preprocessing_pipeline, min_val):
    sample = load_sample(data_folder)
    
    # If the sample has a batch dimension, use the first element.
    if sample.ndim == 3:
        sample = sample[0]
    
    # Check if the sample's first dimension equals the feature dimension as given by min_val.
    # We expect the data to have shape (timesteps, features). If its shape is (features, timesteps)
    # then the first dimension (248) will match min_val's shape (248,). In that case, transpose.
    if sample.shape[0] == min_val.shape[0]:
        sample = sample.T

    # Apply each preprocessing step sequentially.
    for process in preprocessing_pipeline:
        sample = process(sample)
    
    # Return the final shape that will be used as input_shape for the model.
    return sample.shape
############################

input_shape = determine_input_shape(INTRA_TRAIN_FOLDER, preprocessing_pipeline, min_val)
print(f"Determined input shape: {input_shape}")


Learned min/max shapes: (248,), (248,)
Determined input shape: (24936, 248)


In [None]:
model = create_model(input_shape=input_shape)
trained_model = train_model(model, INTRA_TRAIN_FOLDER, epochs=10, batch_size=8, preprocessing=preprocessing_pipeline)

train_loss, train_acc = evaluate_model(trained_model, INTRA_TRAIN_FOLDER, batch_size=8, preprocessing=preprocessing_pipeline)
print(f"Training Loss: {train_loss:.4f}, Accuracy: {train_acc:.4f}")

test_loss, test_acc = evaluate_model(trained_model, INTRA_TEST_FOLDER, batch_size=8, preprocessing=preprocessing_pipeline)
print(f"Test Loss: {test_loss:.4f}, Accuracy: {test_acc:.4f}")

In [None]:
detailed_evaluation(trained_model, INTRA_TEST_FOLDER, batch_size=8, preprocessing=preprocessing_pipeline)

# Cross testing

De inputs in de eerse lagen van het model worden gefilterd naar deelfde soort features waarop je op  kan trainen. Lokale patronen vangen. Verolgens LSTM of attention. 

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv1D, MultiHeadAttention, LayerNormalization, GlobalAveragePooling1D, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping

CROSS_TRAIN_FOLDER = os.path.join(DATA_PATH, "Cross/train")
CROSS1_TEST_FOLDER = os.path.join(DATA_PATH, "Cross/test1")
CROSS2_TEST_FOLDER = os.path.join(DATA_PATH, "Cross/test2")
CROSS3_TEST_FOLDER = os.path.join(DATA_PATH, "Cross/test3")
DOWNSAMPLE_FACTOR = 0.5

preprocessing_pipeline_cross = [
    lambda x: scale_data(x, min_val, max_val),
    lambda x: downsample(x, DOWNSAMPLE_FACTOR),
    add_gaussian_noise,
    channel_dropout
]

def create_cross_model(input_shape, num_classes):
    # Een input voor de geconcateneerde data
    input_layer = Input(shape = input_shape, name = 'concatenated_input')
    x = input_layer
    
    # Lokale features pakken
    x = Conv1D(filters = 64, kernel_size = 5, activation = 'relu', padding = 'same')(x)
    x = Conv1D(filters = 64, kernel_size = 3, activation = 'relu', padding = 'same')(x)
    x = Conv1D(filters = 128, kernel_size = 3, activation = 'relu', padding = 'same')(x)
    
    # Attention
    attention_output = MultiHeadAttention(num_heads = 4, key_dim = 64)(x, x)
    x = LayerNormalization()(x + attention_output)
    
    # Classificatie
    x = GlobalAveragePooling1D()(x)
    x = Dense(256, activation = 'relu')(x)
    x = Dropout(0.5)(x)
    outputs = Dense(num_classes, activation = 'softmax')(x)
    
    
    model = Model(inputs = input_layer, outputs = outputs)
    model.compile(
        optimizer = 'adam',
        loss = 'categorical_crossentropy',
        metrics = ['accuracy']        
    )
    
    return model

def detailed_evaluation(model, folder, preprocessing, shuffle, batch_size):
    y_true = []
    y_pred = []
    
    # Go over each concatenated file, extract the actual label and predict the label
    for batch_data, batch_label in create_batches(CROSS_TRAIN_FOLDER, batch_size = batch_size, 
                                                  preprocessing = preprocessing, 
                                                  shuffle = shuffle):
        y_true.append(batch_label)
        y_pred.append(model.predict(batch_data))
    print(classification_report(y_true, y_pred))

def train_cross_model(model, train_folder: str, epochs: int = 10, batch_size: int = 8, preprocessing: list = None, callbacks=None):
    for epoch in range(epochs):
        print(f"Epoch {epoch+1}/{epochs}")
        for batch_X, batch_y in create_batches(train_folder, batch_size, preprocessing, shuffle=False):
            indices = np.random.permutation(len(batch_X))
            model.fit(batch_X[indices], batch_y[indices], verbose = 0, callbacks = callbacks)
    return model

In [None]:
input_size = determine_input_shape(CROSS_TRAIN_FOLDER, preprocessing_pipeline_cross, min_val)
minval, maxval = learn_minmax(CROSS_TRAIN_FOLDER)
# Early stopping
callback = EarlyStopping(monitor = 'loss', patience = 3)

cross_model = create_cross_model(input_shape=input_size, num_classes=4)
cross_trained_model = train_cross_model(cross_model, CROSS_TRAIN_FOLDER, epochs=10, batch_size=8, preprocessing=preprocessing_pipeline_cross,
                                        callbacks=[callback])

2025-06-12 10:15:14.019372: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


Epoch 1/10


2025-06-12 10:15:17.418236: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 141356032 exceeds 10% of free system memory.


ValueError: Argument `output` must have rank (ndim) `target.ndim - 1`. Received: target.shape=(None, 4), output.shape=(None, 4)

In [None]:
# Actual training
model_cross = create_model(input_shape=input_shape) 
trained_model_cross = train_model(model_cross, CROSS_TRAIN_FOLDER, epochs = 10, batch_size = 8, 
                                  preprocessing = preprocessing_pipeline_cross)

In [None]:
print('Test1 folder')
detailed_evaluation(trained_model_cross, CROSS_TEST1_FOLDER, batch_size = 8, 
                    preprocessing = preprocessing_pipeline_cross)
print('Test2 folder')
detailed_evaluation(trained_model_cross, CROSS_TEST2_FOLDER, batch_size = 8, 
                    preprocessing = preprocessing_pipeline_cross)
print('Test3 folder')
detailed_evaluation(trained_model_cross, CROSS_TEST3_FOLDER, batch_size = 8, 
                    preprocessing = preprocessing_pipeline_cross)

# Code from Mehrkanoon on cross subject classification

In [None]:
import numpy as np
import mne  # MNE-Python toolbox

def preprocess_meg(raw_data):
    # Minimal preprocessing:
    # 1. Normalize the data (order of magnitude is very small)
    data = raw_data * 1e5  # Scaling factor empirically found best
    
    # For AA-CascadeNet and AA-MultiviewNet:
    # Create mesh representation (top-down view of human scalp)
    # 248 MEG channels mapped to spatial positions
    mesh = np.zeros((N, L))  # N rows, L columns based on sensor layout
    # Fill mesh with sensor values, others remain 0
    
    return data, mesh

In [None]:
# Model architecture
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, DepthwiseConv2D, SeparableConv2D
from tensorflow.keras.layers import BatchNormalization, AveragePooling2D, Dropout, Flatten, Dense

def create_aa_eegnet(input_shape, num_classes):
    # Input layer
    inputs = Input(shape=input_shape)
    
    # Block 1: Temporal convolution with self-attention
    x = AttentionAugmentedConv2D(filters=16, kernel_size=(1, 128), 
                               num_heads=2)(inputs)
    x = BatchNormalization()(x)
    
    # Block 2: Spatial convolution
    x = DepthwiseConv2D(kernel_size=(248, 1), depth_multiplier=2)(x)
    x = BatchNormalization()(x)
    x = AveragePooling2D(pool_size=(1, 4))(x)
    x = Dropout(0.25)(x)
    
    # Block 3: Separable convolution
    x = SeparableConv2D(filters=32, kernel_size=(1, 16))(x)
    x = BatchNormalization()(x)
    x = AveragePooling2D(pool_size=(1, 8))(x)
    x = Dropout(0.25)(x)
    
    # Global attention before classification
    x = Flatten()(x)
    x = GlobalAttentionBlock()(x)
    
    # Output layer
    outputs = Dense(num_classes, activation='softmax')(x)
    
    return Model(inputs=inputs, outputs=outputs)

    from tensorflow.keras.layers import LSTM, Concatenate

def create_aa_cascadenet(input_shape, num_classes):
    # Multiple input tensors (W streams)
    input_layers = [Input(shape=input_shape) for _ in range(W)]
    
    processed_streams = []
    for inp in input_layers:
        # Spatial feature extraction with self-attention
        x = AttentionAugmentedConv2D(filters=1, kernel_size=(7,7))(inp)
        x = Conv2D(filters=2, kernel_size=(7,7))(x)
        x = Conv2D(filters=4, kernel_size=(7,7))(x)
        
        # Temporal feature extraction with LSTM + global attention
        x = Flatten()(x)
        x = Dense(125)(x)
        x = LSTM(10, return_sequences=True)(x)
        x = GlobalAttention()(x)  # Applied between LSTM layers
        x = LSTM(10)(x)
        
        processed_streams.append(x)
    
    # Combine streams
    x = Concatenate()(processed_streams)
    x = Dense(125, activation='relu')(x)
    outputs = Dense(num_classes, activation='softmax')(x)
    
    return Model(inputs=input_layers, outputs=outputs)

def create_aa_multiviewnet(spatial_input_shape, temporal_input_shape, num_classes):
    # Spatial stream (same as AA-CascadeNet but separate)
    spatial_input = Input(shape=spatial_input_shape)
    x_spatial = AttentionAugmentedConv2D(filters=1, kernel_size=(7,7))(spatial_input)
    x_spatial = Conv2D(filters=2, kernel_size=(7,7))(x_spatial)
    x_spatial = Conv2D(filters=4, kernel_size=(7,7))(x_spatial)
    x_spatial = Flatten()(x_spatial)
    
    # Temporal stream
    temporal_input = Input(shape=temporal_input_shape)
    x_temp = Dense(125)(temporal_input)
    x_temp = LSTM(10, return_sequences=True)(x_temp)
    x_temp = GlobalAttention()(x_temp)  # Applied to LSTM input sequence
    x_temp = LSTM(10)(x_temp)
    
    # Combine streams
    x = Concatenate()([x_spatial, x_temp])
    x = Dense(125, activation='relu')(x)
    outputs = Dense(num_classes, activation='softmax')(x)
    
    return Model(inputs=[spatial_input, temporal_input], outputs=outputs)

In [None]:
# Attention mechanisms
# Multi-head Self-attention
from tensorflow.keras.layers import Layer, Softmax
import tensorflow as tf

class AttentionAugmentedConv2D(Layer):
    def __init__(self, filters, kernel_size, num_heads=2, **kwargs):
        super().__init__(**kwargs)
        self.filters = filters
        self.kernel_size = kernel_size
        self.num_heads = num_heads
        
    def build(self, input_shape):
        # Regular conv path
        self.conv = Conv2D(self.filters, self.kernel_size, padding='same')
        
        # Self-attention path
        _, H, W, C = input_shape
        self.d_k = C // self.num_heads
        self.W_q = Dense(self.d_k)
        self.W_k = Dense(self.d_k)
        self.W_v = Dense(self.d_k)
        self.W_o = Dense(C)
        
    def call(self, inputs):
        # Convolutional path
        conv_out = self.conv(inputs)
        
        # Self-attention path
        batch_size = tf.shape(inputs)[0]
        H, W, C = inputs.shape[1:]
        
        # Reshape for attention
        x_flat = tf.reshape(inputs, [batch_size, H*W, C])
        
        # Compute Q, K, V for each head
        heads = []
        for _ in range(self.num_heads):
            Q = self.W_q(x_flat)
            K = self.W_k(x_flat)
            V = self.W_v(x_flat)
            
            # Scaled dot-product attention
            attn = tf.matmul(Q, K, transpose_b=True) / tf.sqrt(tf.cast(self.d_k, tf.float32))
            attn = Softmax(axis=-1)(attn)
            head = tf.matmul(attn, V)
            heads.append(head)
        
        # Combine heads
        multi_head = tf.concat(heads, axis=-1)
        attn_out = self.W_o(multi_head)
        attn_out = tf.reshape(attn_out, [batch_size, H, W, C])
        
        # Combine conv and attention
        return tf.concat([conv_out, attn_out], axis=-1)

# Global attention (Luong-style)
class GlobalAttention(Layer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        
    def build(self, input_shape):
        self.W_a = self.add_weight(shape=(input_shape[-1], input_shape[-1]),
                                 initializer='glorot_uniform',
                                 trainable=True)
        self.W_c = self.add_weight(shape=(2*input_shape[-1], input_shape[-1]),
                                 initializer='glorot_uniform',
                                 trainable=True)
        
    def call(self, inputs):
        # inputs shape: [batch, seq_len, features]
        h_t = inputs[:, -1, :]  # Last hidden state
        h_s = inputs  # All hidden states
        
        # Compute attention scores
        scores = tf.matmul(h_t[:, None, :] @ self.W_a, h_s, transpose_b=True)
        a_t = Softmax(axis=-1)(scores)
        
        # Compute context vector
        c_t = tf.reduce_sum(a_t * h_s, axis=1)
        
        # Compute attentional hidden state
        h_attn = tf.tanh(tf.matmul(tf.concat([c_t, h_t], axis=-1), self.W_c))
        
        return h_attn

In [None]:
# Training
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import CategoricalCrossentropy

def train_model():
    # Load and preprocess data
    X_train, y_train = load_data(subjects=[1,2,3,4,5,6,7,8,9,10,11,12])  # 12 subjects
    X_test, y_test = load_data(subjects=[13,14,15,16,17,18])  # 6 subjects
    
    # Create model
    model = create_aa_eegnet(input_shape=(248, 1425, 1), num_classes=4)
    
    # Compile
    model.compile(optimizer=Adam(learning_rate=1e-4),
                loss=CategoricalCrossentropy(),
                metrics=['accuracy'])
    
    # Train
    history = model.fit(X_train, y_train,
                      batch_size=16,  # 64 for Cascade/Multiview
                      epochs=100,
                      validation_split=0.2)
    
    # Evaluate
    test_results = model.evaluate(X_test, y_test)
    print(f"Test accuracy: {test_results[1]:.2f}")

# Alternative approach from scratch

In [None]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from typing import Iterator

#########################################
# Helper Functions (Please customize!)
#########################################

def extract_data_from_folder(folder: str, shuffle: bool = False) -> Iterator[tuple[str, np.ndarray]]:
    """
    Yields (filename, data) pairs. Assumes that each file is a .npy file.
    You might need to change this function to match your file format.
    """
    file_list = os.listdir(folder)
    if shuffle:
        np.random.shuffle(file_list)
    for name in file_list:
        path = os.path.join(folder, name)
        # Load data – here we assume each file is saved as a .npy file.
        data = np.load(path)
        yield name, data

def generate_label(name: str) -> int:
    """
    Generates a label based on the filename.
    For example, if the filename contains the string 'task1' or 'task2'.
    Modify this logic for your task.
    """
    if "task1" in name:
        return 0
    elif "task2" in name:
        return 1
    else:
        return 0

def downsample(x: np.ndarray, factor: float) -> np.ndarray:
    """
    Downsamples the data along the time axis.
    If factor==1.0, returns original array.
    """
    if factor == 1.0:
        return x
    new_length = int(x.shape[0] * factor)
    # Downsample by simple slicing (you could also use more advanced methods)
    return x[::int(1/factor), :]

def learn_minmax(folder: str) -> tuple[float, float]:
    """
    Finds the global minimum and maximum across files in a folder.
    """
    mins, maxs = [], []
    for name, data in extract_data_from_folder(folder, shuffle=False):
        mins.append(data.min())
        maxs.append(data.max())
    return min(mins), max(maxs)

def determine_input_shape(folder: str, preprocessing: list, min_val: float) -> tuple:
    """
    Determines the input shape for your neural network from one file.
    """
    for name, data in extract_data_from_folder(folder, shuffle=False):
        for fn in preprocessing:
            data = fn(data)
        return data.shape

#########################################
# Batch Generator Function (given by you)
#########################################

def create_batches(folder: str, batch_size: int, preprocessing: list = None, shuffle: bool = False) -> Iterator[tuple[np.ndarray, np.ndarray]]:
    batch_data, batch_labels = [], []
    for i, (name, data) in enumerate(extract_data_from_folder(folder, shuffle)):
        if preprocessing:
            for fn in preprocessing:
                data = fn(data)
        batch_data.append(data)
        batch_labels.append(generate_label(name))

        if (i + 1) % batch_size == 0:
            yield np.array(batch_data), np.array(batch_labels)
            batch_data, batch_labels = [], []
    if batch_data:
        yield np.array(batch_data), np.array(batch_labels)

#########################################
# PyTorch Dataset Wrapper
#########################################

class MEGDataset(Dataset):
    def __init__(self, folder: str, preprocessing: list = None, shuffle: bool = False):
        self.data_list = []
        self.label_list = []
        for name, data in extract_data_from_folder(folder, shuffle):
            if preprocessing:
                for fn in preprocessing:
                    data = fn(data)
            self.data_list.append(data)
            self.label_list.append(generate_label(name))
        
    def __len__(self):
        return len(self.data_list)
    
    def __getitem__(self, idx):
        # Ensure data is float32 and shape is (time, channels)
        data = self.data_list[idx].astype(np.float32)
        label = self.label_list[idx]
        return data, label

#########################################
# Model Definition: Shared Backbone + Adapter
#########################################

class MEGClassifier(nn.Module):
    def __init__(self, input_shape: tuple, num_classes: int, adapter_dim: int = 64):
        """
        input_shape: (time, channels)
        """
        super(MEGClassifier, self).__init__()
        # The model expects input of shape (batch, time, channels) and we transpose it to (batch, channels, time)
        self.feature_extractor = nn.Sequential(
            nn.Conv1d(in_channels=input_shape[1], out_channels=32, kernel_size=5, stride=2, padding=2),
            nn.ReLU(),
            nn.Conv1d(in_channels=32, out_channels=64, kernel_size=5, stride=2, padding=2),
            nn.ReLU(),
            nn.AdaptiveAvgPool1d(1),
            nn.Flatten()
        )
        # Feature dim is fixed by the last conv output: here it is 64.
        feature_dim = 64
        # Adapter to allow slight subject-specific adaptations.
        self.adapter = nn.Linear(feature_dim, adapter_dim)
        self.classifier = nn.Linear(adapter_dim, num_classes)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # x: (batch, time, channels) --> convert to (batch, channels, time)
        x = x.permute(0, 2, 1)
        features = self.feature_extractor(x)
        adapted = self.adapter(features)
        output = self.classifier(adapted)
        return output

#########################################
# Training and Evaluation Functions
#########################################

def train_model(model: nn.Module, dataloader: DataLoader, optimizer: optim.Optimizer, criterion: nn.Module, device: torch.device) -> float:
    model.train()
    running_loss = 0.0
    for batch_data, batch_labels in dataloader:
        # Move data to device – note that batch_data is a numpy array so we convert it to tensor
        batch_data = torch.tensor(batch_data, device=device)
        batch_labels = torch.tensor(batch_labels, device=device, dtype=torch.long)
        optimizer.zero_grad()
        outputs = model(batch_data)
        loss = criterion(outputs, batch_labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * batch_data.size(0)
    return running_loss / len(dataloader.dataset)

def evaluate_model(model: nn.Module, dataloader: DataLoader, criterion: nn.Module, device: torch.device) -> tuple[float, float]:
    model.eval()
    running_loss = 0.0
    correct = 0
    with torch.no_grad():
        for batch_data, batch_labels in dataloader:
            batch_data = torch.tensor(batch_data, device=device)
            batch_labels = torch.tensor(batch_labels, device=device, dtype=torch.long)
            outputs = model(batch_data)
            loss = criterion(outputs, batch_labels)
            running_loss += loss.item() * batch_data.size(0)
            preds = torch.argmax(outputs, dim=1)
            correct += (preds == batch_labels).sum().item()
    accuracy = correct / len(dataloader.dataset)
    return running_loss / len(dataloader.dataset), accuracy

#########################################
# Main Routine
#########################################

def main():
    # Configurations and paths:
    DATA_PATH = "./data"
    CROSS_TRAIN_FOLDER = os.path.join(DATA_PATH, "Cross/train")
    CROSS_TEST1_FOLDER = os.path.join(DATA_PATH, "Cross/test1")
    # Additional test folders (if needed)
    # CROSS_TEST2_FOLDER = os.path.join(DATA_PATH, "Cross/test2")
    # CROSS_TEST3_FOLDER = os.path.join(DATA_PATH, "Cross/test3")
    
    DOWNSAMPLE_FACTOR_CROSS = 1.0
    
    # Compute min and max over training data (if needed for normalization)
    min_val_cross, max_val_cross = learn_minmax(CROSS_TRAIN_FOLDER)
    
    preprocessing_pipeline_cross = [
        lambda x: downsample(x, DOWNSAMPLE_FACTOR_CROSS)
        # Add other preprocessing functions here (e.g., normalization)
    ]
    
    input_shape = determine_input_shape(CROSS_TRAIN_FOLDER, preprocessing_pipeline_cross, min_val_cross)
    print(f"Determined input shape: {input_shape}")
    
    # Set hyperparameters:
    num_classes = 2       # Adjust based on your classification task
    batch_size = 16
    epochs = 10
    learning_rate = 1e-4
    
    # Prepare device and model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = MEGClassifier(input_shape=input_shape, num_classes=num_classes)
    model.to(device)
    
    # Create PyTorch datasets and dataloaders:
    train_dataset = MEGDataset(CROSS_TRAIN_FOLDER, preprocessing_pipeline_cross, shuffle=True)
    test_dataset = MEGDataset(CROSS_TEST1_FOLDER, preprocessing_pipeline_cross, shuffle=False)
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
    
    # Define optimizer and loss criterion:
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()
    
    # Training loop:
    for epoch in range(epochs):
        train_loss = train_model(model, train_loader, optimizer, criterion, device)
        test_loss, test_accuracy = evaluate_model(model, test_loader, criterion, device)
        print(f"Epoch {epoch+1}/{epochs}: Train Loss: {train_loss:.4f} | Test Loss: {test_loss:.4f} | Test Acc: {test_accuracy:.4f}")
    
    # Save the trained model:
    torch.save(model.state_dict(), "meg_classifier.pth")
    
if __name__ == '__main__':
    main()
