In [7]:
import re
import os
import pandas as pd
import numpy as np

import sys
sys.path.append(os.path.join(os.path.abspath(''), '..'))
import utils.ssvep_analysis as sa

In [8]:
def read_data(file_path):
    df = pd.read_csv(file_path, skiprows=10, header=None)
    with open(file_path, 'r') as file:
        content = file.read()
    match = re.search(r'Severity:\s*(\d+(\.\d+)?)', content)
    severity = float(match.group(1))
    match = re.search(r'Subject:\s*(\S+)', content)
    subject = str(match.group(1))
    match = re.search(r'Series:\s*(\S+)', content)
    series = str(match.group(1))
    marker = df.to_numpy()[:, -1]
    eeg_data = df.to_numpy()[:, 1:9].astype(float)
    return eeg_data, marker, severity, subject, series

def preprocess_data(eeg_data, marker):
    eeg_data = sa.remove_artefacts(eeg_data)
    eeg_data = sa.apply_ransac_detrending(eeg_data,)
    eeg_data = sa.apply_bandpass_filter(eeg_data, lowcut=14, highcut=35)
    eeg_data = sa.apply_notch_filter(eeg_data)
    eeg_data, _ = sa.compute_reduced_signal(eeg_data)

    start = np.argwhere(marker != 'grey')[0][0]
    end = np.argwhere(marker != 'grey')[-1][0]
    return eeg_data[start:end]

def build_dataset(directory_list):
    X = []
    y = []
    groups = []
    severities = []
    max_len = 0
    for directory in directory_list:
        for file in os.listdir('../data/' + directory):
            file_path = '../data/' + directory + '/' + file
            eeg_data, marker, severity, subject, series = read_data(file_path)
            if series != 'multicolor-deuteranomaly-series' or eeg_data.shape[0] < 7000:
                continue
            eeg_data = preprocess_data(eeg_data, marker)
            X.append(eeg_data)
            y.append(0 if severity == 0 else 1)
            groups.append(subject)
            severities.append(severity)
            max_len = max(max_len, eeg_data.shape[0])
    return X, np.array(y), np.array(groups), np.array(severities), max_len

def zero_pad(X, max_len):
    for i in range(len(X)):
        if len(X[i]) < max_len:
            X[i] = np.concatenate((X[i], np.zeros(max_len - len(X[i]))))
        else:
            X[i] = X[i][:max_len]
    return np.array(X)


directory_list_train = ['2024-01-12', '2024-01-22', '2024-03-04-Schroedi', '2024-03-08-Schwein', '2024-03-14-Daddy', '2024-03-08-Happy', '2024-03-23-Maracuja', '2024-03-07-Tomate', '2024-03-07-Badewanne']
X_train, y_train, groups_train, severities_train, max_len_train = build_dataset(directory_list_train)

directory_list_test = ['2024-01-19', '2024-03-25-Lillifee', '2024-03-04-Gurke']
X_test, y_test, groups_test, severities_test, max_len_test = build_dataset(directory_list_test)

directory_list_cvd = ['2024-03-06-Jens', '2024-03-15-Wert', '2024-03-15-Fisch']
X_cvd, y_cvd, groups_cvd, severities_cvd, max_len_cvd = build_dataset(directory_list_cvd)

max_len = max(max_len_train, max_len_test, max_len_cvd)
X_train = zero_pad(X_train, max_len)
X_test = zero_pad(X_test, max_len)
X_cvd = zero_pad(X_cvd, max_len)

print('Train:', X_test.shape, y_test.shape)
print('Test:', X_train.shape, y_train.shape)
print('CVD:', X_cvd.shape, y_cvd.shape)

Train: (86, 7213) (86,)
Test: (254, 7213) (254,)
CVD: (69, 7213) (69,)


In [9]:
np.save('../data/X_train.npy', X_train)
np.save('../data/y_train.npy', y_train)
np.save('../data/groups_train.npy', groups_train)
np.save('../data/severities_train.npy', severities_train)

np.save('../data/X_test.npy', X_test)
np.save('../data/y_test.npy', y_test)
np.save('../data/groups_test.npy', groups_test)
np.save('../data/severities_test.npy', severities_test)

np.save('../data/X_cvd.npy', X_cvd)
np.save('../data/y_cvd.npy', y_cvd)
np.save('../data/groups_cvd.npy', groups_cvd)
np.save('../data/severities_cvd.npy', severities_cvd)