# Data Processing

This notebook processes the EEG data into DataLoaders to be fed into a 1D CNN model

## Step 0: Preprocessing

In [27]:
import pandas as pd

In [34]:
DATA_FP = "data.csv"
df = pd.read_csv(DATA_FP)
print(df['y'].value_counts())
df['y'] = df['y'].replace([2, 3, 4, 5], 0)
print(df['y'].value_counts())
df.to_csv("data_preprocessed.csv", index=False)

y
4    2300
1    2300
5    2300
2    2300
3    2300
Name: count, dtype: int64
y
0    9200
1    2300
Name: count, dtype: int64


## Step 1: DataLoader

In [40]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import DataLoader, Dataset, random_split

In [37]:
class CustomEEGDataset(Dataset):
    def __init__(self, annotations_file):
        df = pd.read_csv(annotations_file)

        self.X = df.iloc[:, 1:-1].to_numpy(dtype=np.float32)
        y = df.iloc[:, -1]
        y = y.astype(np.int64)
        self.y = y.to_numpy()

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.item()
        x = torch.from_numpy(self.X[idx]).unsqueeze(0)
        y = torch.as_tensor(self.y[idx])
        
        return x, y


In [38]:
DATA_FP = "data_preprocessed.csv"

eeg_dataset = CustomEEGDataset(annotations_file=DATA_FP)
x, y = eeg_dataset[0]

In [39]:
x, y

(tensor([[ 135.,  190.,  229.,  223.,  192.,  125.,   55.,   -9.,  -33.,  -38.,
           -10.,   35.,   64.,  113.,  152.,  164.,  127.,   50.,  -47., -121.,
          -138., -125., -101.,  -50.,   11.,   39.,   24.,   48.,   64.,   46.,
            13.,  -19.,  -61.,  -96., -130., -132., -116., -115.,  -71.,  -14.,
            25.,   19.,    6.,    9.,   21.,   13.,  -37.,  -58.,  -33.,    5.,
            47.,   80.,  101.,   88.,   73.,   69.,   41.,  -13.,  -31.,  -61.,
           -80.,  -77.,  -66.,  -43.,    5.,   87.,  129.,  121.,   88.,   12.,
           -76., -150., -207., -186., -165., -148., -103.,  -33.,   40.,   94.,
            75.,    8.,  -81., -155., -227., -262., -233., -218., -187., -126.,
           -65.,  -12.,   27.,   61.,   49.,    9.,  -46., -124., -210., -281.,
          -265., -181.,  -89.,   -4.,   53.,   53.,   38.,   43.,   31.,   34.,
             9.,   -7.,  -34.,  -70.,  -84., -101.,  -70.,  -11.,   42.,   62.,
            66.,   74.,   64.,   59.,   

In [42]:
train_dataset, test_dataset = random_split(eeg_dataset, [0.8, 0.2])
train_dataset, test_dataset

(<torch.utils.data.dataset.Subset at 0x21f3b7c9050>,
 <torch.utils.data.dataset.Subset at 0x21f3d1c9590>)

In [43]:
# Dataloaders
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=True)
