In [5]:
import os
import h5py
import numpy as np

In [26]:
def load_data_from_folder(folder_path):
    data = []
    labels = []
    for fname in os.listdir(folder_path):
        if fname.endswith('.h5'):
            if 'rest' in fname:
                label = 0
            elif 'math' in fname or 'story' in fname:
                label = 1
            elif 'memory' in fname:
                label = 2
            elif 'motor' in fname:
                label = 3
            else:
                continue

            if ' ' in fname:
                dataset_name = ' '.join(fname.split(' ')[:-1])
            elif '_' in fname:
                dataset_name = '_'.join(fname.split('_')[:-1])
            else:
                continue

            with h5py.File(os.path.join(folder_path, fname), 'r') as f:
                matrix = np.array(f[dataset_name])
                data.append(matrix)
                labels.append(label)
    return np.array(data), np.array(labels)

In [29]:
base_path = "../dataset/Final Project data/Cross/"
train_folder = os.path.join(base_path, "train")
test_folders = [os.path.join(base_path, f"test{i}") for i in range(1, 4)]

X_train, y_train = load_data_from_folder(train_folder)

X_test_list, y_test_list = [], []
for test_folder in test_folders:
    X_test_part, y_test_part = load_data_from_folder(test_folder)
    X_test_list.append(X_test_part)
    y_test_list.append(y_test_part)
X_test = np.concatenate(X_test_list, axis=0)
y_test = np.concatenate(y_test_list, axis=0)

print("Train data shape:", X_train.shape, y_train.shape)
print("Test data shape:", X_test.shape, y_test.shape)

Train data shape: (64, 248, 35624) (64,)
Test data shape: (48, 248, 35624) (48,)


In [31]:
def zscore_normalize(X):
    mean = np.mean(X, axis=2, keepdims=True)
    std = np.std(X, axis=2, keepdims=True) + 1e-8  # Avoid divide by zero
    return (X - mean) / std

def downsample(X, factor=8):
    return X[:, :, ::factor]

In [32]:
X_train_norm = zscore_normalize(X_train)
X_test_norm = zscore_normalize(X_test)

#TODO: ask the TA if it's needed or not
X_train_ds = downsample(X_train_norm, factor=8)
X_test_ds = downsample(X_test_norm, factor=8)

X_train_final = X_train_ds[..., np.newaxis]
X_test_final = X_test_ds[..., np.newaxis]

print("Train shape ready for CNN:", X_train_final.shape)
print("Test shape ready for CNN:", X_test_final.shape)

Train shape ready for CNN: (64, 248, 4453, 1)
Test shape ready for CNN: (48, 248, 4453, 1)


In [35]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense

num_classes = 4

model = Sequential([
    Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=X_train_final.shape[1:]),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),

    Conv2D(64, kernel_size=(3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)


In [36]:
from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(
    X_train_final, y_train,
    batch_size=16,
    epochs=30,
    validation_data=(X_test_final, y_test),
    callbacks=[early_stop],
    verbose=2
)

Epoch 1/30


KeyboardInterrupt: 