# File to experiment with different machine learning models.

In [1]:
from lib.DataObject import DataObject
import lib.DataObjectUtils as util
import torch
import pickle
import torch.nn as nn
from lib.DataHandler import DataAcquisitionHandler
from torch.utils.data import DataLoader, TensorDataset

pygame 2.5.1 (SDL 2.28.2, Python 3.11.5)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
# load data from data/* using pickle
filename = 'data/handler_box_data_full_Oct_30_2023.pkl'
with open(filename, 'rb') as f:
    handler = pickle.load(f)

In [4]:
# Data filter visitor test

data = DataObject(handler.get_data())

data.accept(util.BandpassFilterVisitor(low=0.1, high=10))
data.accept(util.BandstopFilterVisitor(low=49, high=51))

key_data, box_data = data.get_data(decorator=util.MakeTensorWindowsDataDecorator())

sample = box_data[0]

print("List of samples:" ,len(box_data))
print("Sample - (channels, label):", len(box_data[0]))
print("Channels:", len(box_data[0][0]))
max_channel_len = 0
min_channel_len = 10000000
for sample in box_data:
    for channel in sample[0]:
        if len(channel) > max_channel_len:
            max_channel_len = len(channel)
        if len(channel) < min_channel_len:
            min_channel_len = len(channel)
print("Max Channel Len - [reading_1, ...]:", max_channel_len)
print("Min Channel Len - [reading_1, ...]:", min_channel_len)
print("Sample example:", sample)

List of samples: 83
Sample - (channels, label): 2
Channels: 24
Channel - [reading_1, ...]: 250
Max Channel Len - [reading_1, ...]: 250
Min Channel Len - [reading_1, ...]: 250
Sample example: (tensor([[ 2.4500e+02,  2.4600e+02,  2.4700e+02,  ...,  2.3600e+02,
          2.3700e+02,  2.3800e+02],
        [ 8.3703e+03,  8.4933e+03,  8.5348e+03,  ...,  8.4436e+03,
          8.5442e+03,  8.3925e+03],
        [-6.4812e+00, -6.1533e+00, -5.2448e+00,  ..., -3.8378e+00,
         -3.7535e+00, -3.5997e+00],
        ...,
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [ 1.6987e+09,  1.6987e+09,  1.6987e+09,  ...,  1.6987e+09,
          1.6987e+09,  1.6987e+09],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00]]), tensor(1))


In [57]:
# Dataset

class EEGDataset(torch.utils.data.Dataset):

    def __init__(self, data):
        self.data, self.labels = self.parse_data(data)
        self.window_size = self.data.shape[2]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        return self.data[index], self.labels[index]
    
    def parse_data(self, data):
        """
        Data comes in the form of a list of tuples (data, label)
        data is a 2D tensor (channels, readings)
        label is a 1D tensor (1)

        return:
        data: 3D tensor (samples, channels, readings)
        label: 1D tensor (samples)
        """

        data_list = []
        label_list = []

        channels_idx = (1, 9)

        for sample in data:
            data_list.append(sample[0][channels_idx[0]:channels_idx[1]])
            label_list.append(sample[1])

        return torch.stack(data_list), torch.stack(label_list)

In [58]:
# Data loader

dataset = EEGDataset(box_data)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

# Check if data is loaded correctly
print("Train dataset size:", len(train_dataset))
print("Test dataset size:", len(test_dataset))
print("Train loader size:", len(train_loader))
print("Test loader size:", len(test_loader))
print("Sample data shape:", len(dataset))

Train dataset size: 66
Test dataset size: 17
Train loader size: 3
Test loader size: 1
Sample data shape: 83


In [63]:
# create p300Model
class EEGNet(torch.nn.Module):
    """
    Pytorch implementation of EEGNet

    Expecting input of shape (batch_size, channels, readings)
    input = [1, 32, 8, 250]
    batch_size: number of samples in a batch
    channels: number of channels in a sample (8)
    readings: number of readings in a channel (len())
    """

    def __init__(self):
        super(EEGNet, self).__init__()

        # block 1 nn.sequential
        self.block1 = torch.nn.Sequential(
            # Conv2D
            nn.Conv2d(8, 32, (1, 50), padding=0, bias=False),
            
            # Batch norm
            nn.BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),

            # DepthwiseConv2D
            nn.Conv2d(32, 32, (2, 1), groups=16, bias=False),

            # Batch norm
            nn.BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),

            # ELU Activation
            nn.ELU(alpha=1.0),

            # Avg Pooling 2D
            nn.AvgPool2d(kernel_size=(1, 4), stride=(1, 4), padding=0),

            # Spacial Dropout 2D
            nn.Dropout2d(p=0.15)
        )

        # block 2 nn.sequential
        self.block2 = torch.nn.Sequential(
            # Separable Conv2D
            nn.Conv2d(32, 32, (1, 15), padding=0, bias=False),

            # Batch norm
            nn.BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),

            # ELU Activation
            nn.ELU(alpha=1.0),

            # Avg Pooling 2D
            nn.AvgPool2d(kernel_size=(1, 8), stride=(1, 8), padding=0),

            # Spacial Dropout 2D
            nn.Dropout2d(p=0.15)
        )

        # Fully Connected Layer
        self.fc = nn.Linear(in_features=736, out_features=2, bias=True)

    def forward(self, x):
        print(x.shape)

        # block 1
        x = self.block1(x)

        # block 2
        x = self.block2(x)

        # flatten
        x = x.view(x.shape[0], -1)

        # fc
        x = self.fc(x)

        return x

In [64]:
# initialize model
model = EEGNet()

# loss function
loss_fn = torch.nn.CrossEntropyLoss()

# optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [61]:
# training loop

def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        batch_size = X.shape[1]
        # compute prediction and loss
        # get rid of the extra dimension
        # X = torch.resize(X, (batch_size, 8, 250))
        print(X.shape)
        pred = model(X)
        loss = loss_fn(pred, y)

        # backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f'loss: {loss:>7f} [{current:>5d}/{size:>5d}]')

In [65]:
train(train_loader, model, loss_fn, optimizer)

torch.Size([32, 8, 250])
torch.Size([32, 8, 250])


RuntimeError: Given groups=1, weight of size [32, 8, 1, 50], expected input[1, 32, 8, 250] to have 8 channels, but got 32 channels instead