In [31]:
import kagglehub
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import os
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from torch.utils.data import TensorDataset, DataLoader

In [None]:
# Download dataset
# path_1 = kagglehub.dataset_download("xuannguyenuet2004/12-class-ssvep-eeg-data") proofed to be bad
path_1 = kagglehub.dataset_download("girgismicheal/steadystate-visual-evoked-potential-signals")
path_1 += "/SSVEP (BrainWheel)"
print("Download datasetaset files:", "\n", path_1)

Download datasetaset files: 
 /home/zeyadcode/.cache/kagglehub/datasets/girgismicheal/steadystate-visual-evoked-potential-signals/versions/1/SSVEP (BrainWheel)


In [29]:
# HYPER PARAMS
WINDOW_LENGTH = 128
BATCH_SIZE = 64

# PARAMS RELATED TO DATASET ONLY
TRIAL_LENGTH = 640  # frequency of changing.. frequency

In [None]:
class EEGDataset(Dataset):
    def __init__(self, data_path, trial_length, window_length=128, stride=None) -> None:
        """
        todo complete documentation
        trial_length: the number of rows before frequency shift in the dataset

        N: sample length
        C: channels (number of electrodes)
        B: Batch Size
        """
        super().__init__()

        assert trial_length % window_length == 0, "Please choose window_length that divides by trial_length"
        self.data_path = data_path
        self.data = []
        self.labels = []

        if stride == None:
            stride = window_length

        # Load all subjects' data
        subject_dirs = [d for d in os.listdir(data_path) if d.startswith("subject_")]

        for subject_dir in subject_dirs:
            subject_path = os.path.join(data_path, subject_dir)
            sample_files = [f for f in os.listdir(subject_path) if f.endswith(".csv")]

            for sample_file in sample_files:
                sample_file_path = os.path.join(subject_path, sample_file)
                df = pd.read_csv(sample_file_path, header=None, skiprows=1)  # samples x (electrodes + 1)

                freqs = df.iloc[:, -1].values

                # first get of shape trial_length x freq
                n_rows = len(freqs)
                n_trials = n_rows // trial_length
                for t in range(n_trials):
                    start = t * trial_length
                    end = start + trial_length
                    block_freqs = freqs[start:end]  # shape Nx1

                    assert np.all(block_freqs == block_freqs[0]), f"Mixed labels in trial {t} of {sample_file}"

                    # trial_label = np.ones([window_length, 1]) * freqs[0]
                    trial_label = block_freqs[0]
                    trial_data = df.iloc[start:end, :-1].values  # shape [trial_length x C]

                    for i in range(0, trial_length - window_length + 1, stride):
                        win = trial_data[i: i + window_length, :].T # C x trial_window
                        self.data.append(win.astype(np.float32))
                        self.labels.append(trial_label)


        self.data = np.array(self.data) # B x C x window_length = 5200 x 14 x 128
        self.labels = np.array(self.labels) # B x 1 = 5200 x 1 

        self.data = torch.tensor(self.data)
        self.labels = torch.tensor(self.labels)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

    def __len__(self):
        return len(self.data)


dataset = EEGDataset(path_1, TRIAL_LENGTH, WINDOW_LENGTH, stride=WINDOW_LENGTH)

In [33]:
X = dataset.data.numpy()
Y = dataset.labels.numpy()


X_train_val, X_test, Y_train_val, Y_test = train_test_split(
    X,
    Y,
    test_size=0.2,
    random_state=42,
    stratify=Y,
)

X_train, X_val, Y_train, Y_val = train_test_split(
    X_train_val,
    Y_train_val,
    test_size=0.2,
    random_state=42,
    stratify=Y_train_val,
)

# Turn to tensors
X_train_t = torch.from_numpy(X_train).float()
Y_train_t = torch.from_numpy(Y_train).long()

X_val_t   = torch.from_numpy(X_val).float()
Y_val_t   = torch.from_numpy(Y_val).long()

X_test_t  = torch.from_numpy(X_test).float()
Y_test_t  = torch.from_numpy(Y_test).long()

# Build dataset
train_ds = TensorDataset(X_train_t, Y_train_t)
val_ds   = TensorDataset(X_val_t,   Y_val_t)
test_ds  = TensorDataset(X_test_t,  Y_test_t)

len(train_ds), len(val_ds), len(test_ds)

(3328, 832, 1040)

In [34]:
train_loader = DataLoader(train_ds,
                          batch_size=BATCH_SIZE,
                          shuffle=True,
                          drop_last=True)

val_loader = DataLoader(val_ds,
                        batch_size=BATCH_SIZE,
                        shuffle=False)

test_loader = DataLoader(test_ds,
                         batch_size=BATCH_SIZE,
                         shuffle=False)