Fz: EXG Channel 2, blue

Cz: EXG Channel 7, brown

P3: EXG Channel 6, red

Pz: EXG Channel 5, orange

P4: EXG Channel 4, yellow

Column index in csv file: 3, 8, 7, 6, 5

Total time for each experiment is 60 seconds. Each white image (target) or black image (non-target) appears for 0.5 second.

250 Hz

#Data preprocessing#

In [151]:
TOTAL_TIME = 60
IMAGE_TIME = 0.5

In [152]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

In [153]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [154]:
# read CSV file (data)
file_path_csv = '/content/drive/My Drive/Colab Notebooks/ece496_p300/BrainFlow-RAW_2023-11-02_16-27-02_67.csv'
df = pd.read_csv(file_path_csv, sep='\t', header=None)

# select columns of 5 electrodes
Fz_column = df.iloc[:, 3]
Cz_column = df.iloc[:, 8]
P3_column = df.iloc[:, 7]
Pz_column = df.iloc[:, 6]
P4_column = df.iloc[:, 5]

Fz_array = Fz_column.to_numpy()
Cz_array = Cz_column.to_numpy()
P3_array = P3_column.to_numpy()
Pz_array = Pz_column.to_numpy()
P4_array = P4_column.to_numpy()

x_array = np.column_stack((Fz_array, Cz_array, P3_array, Pz_array, P4_array))

# select column of timestamp and convert unix timestamp to formatted timestamp
timestamp_column = df.iloc[:, -2]
timestamp_array = timestamp_column.to_numpy()
formatted_timestamp_array = [datetime.fromtimestamp(unix_timestamp) for unix_timestamp in timestamp_array]
formatted_timestamp_array = [timestamp - timedelta(hours=4) for timestamp in formatted_timestamp_array]

In [155]:
#print(x_array.shape)
#print(x_array[0])

In [156]:
# read TXT file (ground truth)
with open('/content/drive/My Drive/Colab Notebooks/ece496_p300/white_time_1.txt', 'r') as file:
    lines = file.readlines()

# initialize lists to store timestamps
start_timestamp = None
white_timestamps = []

# process each line
for line in lines:
  if "Recording start at:" in line:
    # extract and convert the absolute start timestamp
    start_timestamp_str = line.split(": ", 1)[1].strip()
    start_timestamp = datetime.fromisoformat(start_timestamp_str)
  elif "White image shown at:" in line:
    # extract and convert the relative white image timestamps
    white_timestamp_str = line.split(": ", 1)[1].strip()
    hours, minutes, seconds = map(float, white_timestamp_str.split(':'))
    white_timestamp = timedelta(hours=hours, minutes=minutes, seconds=seconds)
    white_timestamps.append(white_timestamp)

absolute_white_timestamps = [start_timestamp + delta for delta in white_timestamps]

# compute relative white time in seconds
white_t = []
for i in range(len(white_timestamps)):
  white_t.append(white_timestamps[i].seconds + white_timestamps[i].microseconds / 1000000)

In [157]:
#white_timestamps

In [158]:
#white_t

In [159]:
# label data (0: non-target, 1: target)
# Since total time is 60s and each image is 0.5s, there are 120 labels

label = [0] * int(TOTAL_TIME / IMAGE_TIME)
for t in white_t:
  label[round(t / IMAGE_TIME) + 1] = 1

In [160]:
#len(label)

In [161]:
# Segment data into 120 segments, each corresponding to one label
# dataset 1: start from line 1339, timestamp=1698959840.671408 to match start time of white/black image
data = []

index = 1339
for i in range(120):
  data_i = [x_array[index:index+125, :], label[i]]
  data.append(data_i)
  index += 125

data = np.array(data, dtype=object)

In [162]:
#data[0][0].shape

#Model

In [163]:
import torch
import torch.nn as nn

In [164]:
# a simple conv net
conv1_ksize = (15, 1)
conv2_ksize = (1, 5)

conv1_isize = 1
conv1_osize = 5
conv2_isize = conv1_osize
conv2_osize = 10

fc1_dropout_p = 0.5
fc2_dropout_p = 0.5

fc1_isize = 1110
fc1_osize = 256
fc2_isize = fc1_osize
fc2_osize = 1

In [165]:
class ConvLayer(nn.Module):

    def __init__(self, isize, osize, ksize, maxpool=None):

        super(ConvLayer, self).__init__()

        self.layer = nn.Sequential(
            nn.Conv2d(isize, osize, ksize),
            nn.BatchNorm2d(osize),
            nn.Tanh()
        )

        # optional maxpool
        self.maxpool = None
        if maxpool:
            self.maxpool = nn.MaxPool2d(maxpool)

    def forward(self, x):
        x = self.layer(x)
        if self.maxpool:
            x = self.maxpool(x)
        return x

class LinearLayer(nn.Module):

    def __init__(self, isize, osize, dropout_p, norm=True, activate=True):

        super(LinearLayer, self).__init__()

        self.dropout = nn.Dropout(p=dropout_p)
        self.linear = nn.Linear(isize, osize)
        self.batch_norm = nn.BatchNorm1d(osize) if norm else None
        self.activate = nn.Tanh() if activate else None

    def forward(self, x):

        x = self.dropout(x)
        x = self.linear(x)
        if self.batch_norm:
            x = self.batch_norm(x)
        if self.activate:
            x = self.activate(x)
        return x

class ConvNet(nn.Module):

    def __init__(self):

        super(ConvNet, self).__init__()

        self.in_batch_norm = nn.BatchNorm2d(conv1_isize)
        self.feat_extractor = nn.Sequential(
            ConvLayer(conv1_isize, conv1_osize, conv1_ksize),
            ConvLayer(conv2_isize, conv2_osize, conv2_ksize)
        )
        self.fc_layer = nn.Sequential(
            LinearLayer(fc1_isize, fc1_osize, fc1_dropout_p),
            LinearLayer(fc2_isize, fc2_osize, fc2_dropout_p, norm=False, activate=False),
            nn.Sigmoid()
        )

    def forward(self, x):

        x = self.in_batch_norm(x)
        x = self.feat_extractor(x)

        # flatten the input
        batch_size = x.size()[0]
        x = x.view(batch_size, -1)

        # fc layer
        x = self.fc_layer(x)
        return x

#Load data#

In [177]:
####### main logic #######
# load the data
# data format: [(x, y)]
data_size = len(data)

# shuffle data
shuffle_idx = np.random.permutation(data_size)
data = data[shuffle_idx]

# 80-20 split train/test
cutoff = int(data_size * 80 // 100)
train_data = data[:cutoff]
test_data = data[cutoff:]

In [178]:
len(train_data)

96

In [179]:
# balance label in the train_data
train_data_size = len(train_data)
train_data_true_count = np.sum([x[1] for x in train_data])
train_data_false_count = train_data_size - train_data_true_count

print('train_data_size, train_data_true_count, train_data_false_count')
print(train_data_size, train_data_true_count, train_data_false_count)

test_data_size = len(test_data)
test_data_true_count = np.sum([x[1] for x in test_data])
test_data_false_count = test_data_size - test_data_true_count

print('test_data_size, test_data_true_count, test_data_false_count')
print(test_data_size, test_data_true_count, test_data_false_count)

# DON'T NEED TO BALANCE
# assert train_data_false_count >= train_data_true_count

# train_data_dup_count = train_data_false_count - train_data_true_count
# train_data_true_idx = np.array([i for i, x in enumerate(train_data) if x[1] == 1])
# train_data_true_sample_idx = np.random.choice(train_data_true_idx, train_data_dup_count, replace=True)
# train_data_addon = train_data[train_data_true_sample_idx]

# # make sure that all the addon have true labels
# assert all([x[1] == 1 for x in train_data_addon])

# # stack the addon to the original trainning data and shuffle again
# train_data = np.concatenate((train_data, train_data_addon), axis=0)
# train_data_size = len(train_data)
# shuffle_idx = np.random.permutation(train_data_size)
# train_data = train_data[shuffle_idx]

train_data_size, train_data_true_count, train_data_false_count
96 17 79
test_data_size, test_data_true_count, test_data_false_count
24 5 19


#Train#

In [180]:
batch_size = 128

# for debug
# np.random.seed(0)

# divide data into minibatches
def minibatch(data, batch_size):
    start = 0
    while True:

        end = start + batch_size
        yield data[start:end]

        start = end
        if start >= len(data):
            break

# calculate acc
def cal_acc(pred, target):
    assert len(pred) == len(target)
    acc = np.sum(pred == target) / len(pred)
    return acc

def cal_f(pred, target):
    assert len(pred) == len(target)
    tp = 0
    for i in range(len(pred)):
        if pred[i] == target[i] and pred[i] == 1:
            tp += 1
    percision = tp / np.sum(pred == 1)
    recall = tp / np.sum(target == 1)
    f_score = (2 * percision * recall) / (percision + recall)
    return f_score, percision, recall

# train function
def train_batch(model, criterion, optimizer, batch):

    model.zero_grad()

    # forward pass
    ##x = torch.FloatTensor([i for i in batch[:, 0]]).cuda()
    ##x = torch.FloatTensor([i for i in batch[:, 0]])
    x_numpy_array = np.array([i for i in batch[:, 0]])##
    x = torch.FloatTensor(x_numpy_array)##
    _, height, width = x.size()
    x = x.view(min(batch_size, len(x)), 1, height, width)
    ##y = torch.FloatTensor([i for i in batch[:, 1]]).cuda()
    ##y = torch.FloatTensor([i for i in batch[:, 1]])
    y_numpy_array = np.array([i for i in batch[:, 1]])##
    y = torch.FloatTensor(y_numpy_array)##
    pred = model(x)

    # back proporgation
    loss = criterion(pred.view(-1), y)
    loss.backward()
    optimizer.step()

    pred = pred.cpu().detach().numpy().reshape(-1)
    pred = np.array([1 if n >= 0.5 else 0 for n in pred])
    return pred

def val_batch(model, criterion, optimizer, batch):

    with torch.no_grad():

        # forward pass
        ##x = torch.FloatTensor([i for i in batch[:, 0]]).cuda()
        ##x = torch.FloatTensor([i for i in batch[:, 0]])
        x_numpy_array = np.array([i for i in batch[:, 0]])##
        x = torch.FloatTensor(x_numpy_array)##
        _, height, width = x.size()
        x = x.view(min(batch_size, len(x)), 1, height, width)
        ##y = torch.FloatTensor([i for i in batch[:, 1]]).cuda()
        ##y = torch.FloatTensor([i for i in batch[:, 1]])
        y_numpy_array = np.array([i for i in batch[:, 1]])##
        y = torch.FloatTensor(y_numpy_array)##
        pred = model(x)

        pred = pred.cpu().detach().numpy().reshape(-1)
        pred = np.array([1 if n >= 0.5 else 0 for n in pred])
        return pred

In [181]:
epoch = 10

# init model
model = ConvNet()

criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5 * 2, weight_decay=1e-2)

# train loop
# use k-fold validation
k_fold = 10
fold_size = int(train_data_size // k_fold)
for i in range(k_fold):

    # split data into train/val
    val_data_curr_fold = train_data[i*fold_size:(i+1)*fold_size]
    train_data_curr_fold_head = train_data[:i*fold_size]
    train_data_curr_fold_tail = train_data[(i+1)*fold_size:]
    train_data_curr_fold = np.concatenate((train_data_curr_fold_head, train_data_curr_fold_tail))

    # epoch
    model = model.train()
    for curr_epoch in range(epoch):

        # train minibatch
        train_pred = []
        train_data_curr_fold = train_data_curr_fold[np.random.permutation(len(train_data_curr_fold))]
        for b in minibatch(train_data_curr_fold, batch_size):
            train_batch_pred = train_batch(model, criterion, optimizer, b)
            train_pred.append(train_batch_pred)
        train_pred = np.concatenate(train_pred, axis=0)

        val_pred = []
        for b in minibatch(val_data_curr_fold, batch_size):
            val_batch_pred = val_batch(model, criterion, optimizer, b)
            val_pred.append(val_batch_pred)
        val_pred = np.concatenate(val_pred, axis=0)

        # calculate acc
        train_target = train_data_curr_fold[:, 1].reshape(-1)
        train_acc = cal_acc(train_pred, train_target)
        val_target = val_data_curr_fold[:, 1].reshape(-1)
        val_acc = cal_acc(val_pred, val_target)

        # print stats
        print(f"fold: {i}, epoch: {curr_epoch}, train acc: {train_acc}, val acc: {val_acc}")

    # test acc
    model = model.eval()
    test_pred = []
    for b in minibatch(test_data, batch_size):
        test_batch_pred = val_batch(model, criterion, optimizer, b)
        test_pred.append(test_batch_pred)
    test_pred = np.concatenate(test_pred, axis=0)
    test_target = test_data[:, 1].reshape(-1)
    test_acc = cal_acc(test_pred, test_target)
    test_f_score, test_percision, test_recall = cal_f(test_pred, test_target)
    print(f"fold: {i}, test acc: {test_acc}")
    print(f"fold: {i}, test percision: {test_percision}, test recall: {test_recall}, test f score: {test_f_score}")

    if i == k_fold - 1:##
        # Save the model after the first fold
        model_save_path = f'/content/drive/My Drive/Colab Notebooks/cnn-p300/models/model_fold_{i+1}_epoch_{epoch}_bs_{batch_size}.pth'##
        torch.save(model.state_dict(), model_save_path)##

        torch.save(model.state_dict(), f'model_fold_{i+1}_epoch_{epoch}_bs_{batch_size}.pth')##
        from google.colab import files##
        files.download(f'model_fold_{i+1}_epoch_{epoch}_bs_{batch_size}.pth')##

    #     break##

fold: 0, epoch: 0, train acc: 0.5402298850574713, val acc: 0.4444444444444444
fold: 0, epoch: 1, train acc: 0.5402298850574713, val acc: 0.5555555555555556
fold: 0, epoch: 2, train acc: 0.367816091954023, val acc: 0.2222222222222222
fold: 0, epoch: 3, train acc: 0.4942528735632184, val acc: 0.5555555555555556
fold: 0, epoch: 4, train acc: 0.42528735632183906, val acc: 0.3333333333333333
fold: 0, epoch: 5, train acc: 0.5517241379310345, val acc: 0.6666666666666666
fold: 0, epoch: 6, train acc: 0.45977011494252873, val acc: 0.6666666666666666
fold: 0, epoch: 7, train acc: 0.4482758620689655, val acc: 0.3333333333333333
fold: 0, epoch: 8, train acc: 0.45977011494252873, val acc: 0.6666666666666666
fold: 0, epoch: 9, train acc: 0.45977011494252873, val acc: 0.4444444444444444
fold: 0, test acc: 0.20833333333333334
fold: 0, test percision: 0.20833333333333334, test recall: 1.0, test f score: 0.3448275862068966
fold: 1, epoch: 0, train acc: 0.5287356321839081, val acc: 0.6666666666666666
fol

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>