In [1]:
import os

import numpy as np

In [2]:
from src.utils import read_eeg_signal_from_file

labels = []
data = []
baseline = []

data_path = 'data/data_preprocessed_python'

for filename in os.listdir('data/data_preprocessed_python'):
    file_path = os.path.join(data_path, filename)
    trial = read_eeg_signal_from_file(file_path)
    labels.append(trial['labels'])
    data.append(trial['data'][:, :32, :])  # leave only eeg channels

labels = np.array(labels)
data = np.array(data)

In [3]:
data.shape

(32, 40, 32, 8064)

In [4]:
labels.shape

(32, 40, 4)

8064 points at 128 Hz -> 63 seconds of EEG readings [3 seconds of baseline, before stimuli]

Strategy for creating extracts:

Compute sample mean and variance on first 3 seconds (32 pairs of values) $\hat \mu_0, \hat s_0^2$

Drop 3 seconds after the baseline

Make windows of length 7 seconds on the rest 57 seconds

Set Hop between windows at 1 seconds for 50 7 second length (128 x 7 = 896 points) segments

For each segment $R_i$, resulting train data is $X_i = \frac{R_i - \hat mu_0}{\hat s_0^2}$


In [5]:
from tqdm import tqdm
from src.fourier import psd_from_eeg

psd_config = dict(
    selected_channels=[1, 2, 3, 4, 6, 11, 13, 17, 19, 20, 21, 25, 29, 31],
    freq_bands={
        'Theta': [4, 8],
        'Alpha': [8, 12],
        'Low Beta': [12, 18],
        'High Beta': [18, 30],
        'Gamma': [30, 45]},
    window_size=128 * 5,
    step_size=128 * 5,
    sample_rate=128,
    two_dim=False
)

X = []
for participant_data in tqdm(data):
    t = []
    for readings in participant_data:
        subset = readings.T[128 * 25:128 * 40]
        window_psds = psd_from_eeg(subset, **psd_config)
        t.append(np.concatenate(window_psds))
    X.append(t)
X = np.array(X, dtype=np.float32)

100%|██████████| 32/32 [00:04<00:00,  7.14it/s]


In [6]:
X.shape

(32, 40, 210)

In [7]:
Y = []
for i in range(X.shape[0]):
    _y = []
    for j in range(X.shape[1]):
        _y.append(labels[i][j][:2])
    Y.append(_y)
Y = np.array(Y, dtype=np.float32)

In [8]:
print(X.shape)
print(Y.shape)

(32, 40, 210)
(32, 40, 2)


In [14]:
_x = X.reshape((-1, X.shape[-1]))
_y = Y.reshape((-1, Y.shape[-1]))

In [15]:
from sklearn.model_selection import train_test_split

X_train, X_val, Y_train, Y_val = train_test_split(_x, _y, test_size=0.2)

In [16]:
print(X_train.shape)
print(X_val.shape)

(1024, 210)
(256, 210)


In [19]:
with open('dataset.npz', 'wb') as f:
    np.savez(f,
             train_x=X_train, val_x=X_val,
             train_y=Y_train, val_y=Y_val,
             )