In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torch.nn.functional as F
import os
import scipy
import random
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader as DL
from torch.utils.data import TensorDataset as TData
from tqdm import tqdm
import re
from sklearn.model_selection import train_test_split as tts
import pickle

import zipfile
import pandas as pd
from sklearn.preprocessing import LabelEncoder

In [10]:
!unzip "LHNT_EEG.zip"

Archive:  LHNT_EEG.zip
replace LHNT EEG/Nandini_Senthilkumar_Session6/right_2.pkl? [y]es, [n]o, [A]ll, [N]one, [r]ename: N


In [11]:
def getAllPickles(directory="LHNT EEG"):
    folders = [drctry for drctry in os.listdir(directory) if os.path.isdir(os.path.join(directory, drctry))]
    files = []
    for folder in folders:
        folder_files = os.listdir(os.path.join(directory, folder))
        for file in folder_files:
            if ".pkl" in file:
                files.append(os.path.join(directory, folder, file))
    return files

def npFromPickle(pickle_files):
    np_data = []
    labels = [] # 0 is left, 1 is right
    for file in pickle_files:
        with open(file, "rb") as f:
            data1 = pickle.load(f)
            np_data.append(data1[0])
        if 'right' in file.split('/')[-1]:
            labels.append(1)
        else:
            labels.append(0)
    return np_data, labels

np_data, labels = npFromPickle(getAllPickles())
print(len(np_data), len(labels))

380 380


In [12]:
# applying a bandpass filter
def bandpass_filter(signal, crit_freq = [1, 40], sampling_freq = 125, plot = False, channel = 0):
  order = 4

  b, a = scipy.signal.butter(order, crit_freq, btype = 'bandpass', fs = sampling_freq)
  processed_signal = scipy.signal.filtfilt(b, a, signal, 1)

  if plot == True:
    plt.figure()
    plt.xlabel('Time')
    plt.ylabel(f'Normalized amplitude of channel {channel}')
    plt.title(f'{crit_freq[0]}-{crit_freq[1]}Hz bandpass filter')
    signal_min = np.full((signal.shape[1], signal.shape[0]), np.min(signal, 1)).transpose()
    signal_max = np.full((signal.shape[1], signal.shape[0]), np.max(signal, 1)).transpose()
    normed_signal = (signal - signal_min) / (signal_max - signal_min)
    filtered_min = np.full((processed_signal.shape[1], processed_signal.shape[0]), np.min(processed_signal, 1)).transpose()
    filtered_max = np.full((processed_signal.shape[1], processed_signal.shape[0]), np.max(processed_signal, 1)).transpose()
    normed_filt = (processed_signal - filtered_min) / (filtered_max - filtered_min)
    plt.plot(np.arange(normed_signal[channel].size), normed_signal[channel], label = 'Input')
    plt.plot(np.arange(normed_filt[channel].size), normed_filt[channel], label = 'Transformed')
    plt.legend()

  return processed_signal


# function to segment eeg data based on sampling freq(Hz), window_size(s), and window_shift(s)
def segmentation(signal, sampling_freq=125, window_size=1, window_shift=0.016):
  w_size = int(sampling_freq * window_size)
  w_shift = int(sampling_freq * window_shift)
  segments = []
  i = 0
  while i + w_size <= signal.shape[1]:
    segments.append(signal[:, i: i + w_size])
    i += w_shift
  return segments

def channel_rearrangment(sig, channel_order):
    channel_order = [channel - 1 for channel in channel_order]
    reindexed = np.zeros_like(sig)
    for i, ind in enumerate(channel_order):
        reindexed[i] = sig[ind]
    return reindexed

ordered_channels = [1, 9, 11, 3, 2, 12, 10, 4, 13, 5, 15, 7, 14, 16, 6, 8]

In [13]:
train_x, test_x, train_y, test_y = tts(np_data, labels, test_size = 0.25)
val_x, test_x = test_x[:len(test_x)//2], test_x[len(test_x)//2:]
val_y, test_y = test_y[:len(test_y)//2], test_y[len(test_y)//2:]

In [14]:
# applying all preprocessing steps to create train and test data
train_eeg = []
train_labels = []
valid_eeg = []
valid_labels = []
test_eeg = []
test_labels = []
for sig, label in zip(train_x, train_y):
  if sig.shape[1] == 0: # excluding empty sample elements
    #print(name)
    continue
  reindexed_signal = channel_rearrangment(sig, ordered_channels)
  filtered_sig = bandpass_filter(reindexed_signal, [5, 40], 125) # bandpass filter
  normed_sig = (filtered_sig - np.mean(filtered_sig, 1, keepdims=True)) / np.std(filtered_sig, 1, keepdims=True) # standard scaling
  if np.isnan(normed_sig).any(): # excluding sample elements with nans
    print("nan")
    continue
  signals = segmentation(normed_sig, 125, window_size = 1.5, window_shift = 0.0175) # segmentation
  labels = [label] * len(signals)
  train_eeg.extend(signals)
  train_labels.extend(labels)

for sig, label in zip(val_x, val_y):
  if sig.shape[1] == 0: # excluding empty sample elements
    #print(name)
    continue
  reindexed_signal = channel_rearrangment(sig, ordered_channels)
  filtered_sig = bandpass_filter(reindexed_signal, [5, 40], 125) # bandpass filter
  normed_sig = (filtered_sig - np.mean(filtered_sig, 1, keepdims=True)) / np.std(filtered_sig, 1, keepdims=True) # standard scaling
  if np.isnan(normed_sig).any(): # excluding sample elements with nans
    print("nan")
    continue
  signals = segmentation(normed_sig, 125, window_size = 1.5, window_shift = 0.0175) # segmentation
  labels = [label] * len(signals)
  valid_eeg.extend(signals)
  valid_labels.extend(labels)

for sig, label in zip(test_x, test_y):
  if sig.shape[1] == 0: # excluding empty sample elements
    #print(name)
    continue
  reindexed_signal = channel_rearrangment(sig, ordered_channels)
  filtered_sig = bandpass_filter(reindexed_signal, [5, 40], 125) # bandpass filter
  normed_sig = (filtered_sig - np.mean(filtered_sig, 1, keepdims=True)) / np.std(filtered_sig, 1, keepdims=True) # standard scaling
  if np.isnan(normed_sig).any(): # excluding sample elements with nans
    print("nan")
    continue
  signals = segmentation(normed_sig, 125, window_size = 1.5, window_shift = 0.0175) # segmentation, changed to 125
  labels = [label] * len(signals)
  test_eeg.extend(signals)
  test_labels.extend(labels)

In [15]:
train_eeg_tensor = torch.zeros((len(train_eeg), train_eeg[0].shape[0], train_eeg[0].shape[1])) # untransposed dimensions 1 and 2
valid_eeg_tensor = torch.zeros((len(valid_eeg), valid_eeg[0].shape[0], valid_eeg[0].shape[1]))
test_eeg_tensor = torch.zeros((len(test_eeg), test_eeg[0].shape[0], test_eeg[0].shape[1]))
for i in range(len(train_eeg)):
  tens = torch.from_numpy(train_eeg[i].copy()) # no longer transposing before conversion to tensor
  train_eeg_tensor[i] = tens
for i in range(len(valid_eeg)):
  tens = torch.from_numpy(valid_eeg[i].copy())
  valid_eeg_tensor[i] = tens
for i in range(len(test_eeg)):
  tens = torch.from_numpy(test_eeg[i].copy())
  test_eeg_tensor[i] = tens
train_label_tensor = torch.zeros(len(train_labels), 2)
valid_label_tensor = torch.zeros(len(valid_labels), 2)
test_label_tensor = torch.zeros(len(test_labels), 2)
for i, val in enumerate(train_labels):
  train_label_tensor[i][val] = 1
for i, val in enumerate(valid_labels):
  valid_label_tensor[i][val] = 1
for i, val in enumerate(test_labels):
  test_label_tensor[i][val] = 1

train_ds = TData(train_eeg_tensor, train_label_tensor)
valid_ds = TData(valid_eeg_tensor, valid_label_tensor)
test_ds = TData(test_eeg_tensor, test_label_tensor)
train_dl = DL(train_ds, batch_size=64, shuffle= True, drop_last = True)
valid_dl = DL(valid_ds, batch_size=64, shuffle= True, drop_last = True)
test_dl = DL(test_ds, batch_size=64, shuffle = True, drop_last = True)

In [16]:
print(len(train_dl), len(valid_dl), len(test_dl))

1754 280 286


In [20]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader as DL
import torch.nn.functional as F
import time
# Assume `train_dl`, `valid_dl`, and `test_dl` are already created from preprocessing.
# Step 1: Define the Residual CNN Model
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm1d(out_channels)
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm1d(out_channels)
        self.identity_conv = (
            nn.Conv1d(in_channels, out_channels, kernel_size=1)
            if in_channels != out_channels
            else None
        )
    def forward(self, x):
        identity = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = F.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        if self.identity_conv is not None:
            identity = self.identity_conv(identity)
        out += identity
        return F.relu(out)
class FCResCNN(nn.Module):
    def __init__(self, num_channels, sequence_length, num_classes):
        super(FCResCNN, self).__init__()
        self.down_sample = nn.Sequential(
            nn.Conv1d(num_channels, 32, kernel_size=1, stride=1),
            nn.BatchNorm1d(32),
            nn.Dropout(0.5),
        )
        self.res_block1 = ResidualBlock(32, 32)
        self.res_block2 = ResidualBlock(32, 64)
        self.res_block3 = ResidualBlock(64, 128)
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * sequence_length, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, num_classes),
        )
    def forward(self, x):
        x = self.down_sample(x)
        x = self.res_block1(x)
        x = self.res_block2(x)
        x = self.res_block3(x)
        x = self.classifier(x)
        return x
# Step 2: Model Configuration
print("Extracting model configuration...")
example_batch = next(iter(train_dl))
features, labels = example_batch
num_channels = features.shape[1]
sequence_length = features.shape[2]
num_classes = labels.shape[1]
print(f"Number of channels: {num_channels}")
print(f"Sequence length: {sequence_length}")
print(f"Number of classes: {num_classes}")
# Initialize the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
model = FCResCNN(num_channels, sequence_length, num_classes).to(device)
# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
# Step 3: Training Function
def train_model(model, train_loader, valid_loader, criterion, optimizer, num_epochs=20):
    print("Starting training...")
    for epoch in range(num_epochs):
        start_time = time.time()
        model.train()
        train_loss = 0.0
        print(f"Epoch {epoch + 1}/{num_epochs}:")
        for batch_idx, (features, labels) in enumerate(train_loader):
            features, labels = features.to(device), labels.argmax(dim=1).to(device)
            optimizer.zero_grad()
            outputs = model(features)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            if batch_idx % 10 == 0:  # Print every 10 batches
                print(f"  Batch {batch_idx}/{len(train_loader)}, Loss: {loss.item():.4f}")
        # Validation phase
        model.eval()
        valid_loss = 0.0
        with torch.no_grad():
            for features, labels in valid_loader:
                features, labels = features.to(device), labels.argmax(dim=1).to(device)
                outputs = model(features)
                loss = criterion(outputs, labels)
                valid_loss += loss.item()
        epoch_time = time.time() - start_time
        print(
            f"Epoch {epoch + 1} completed in {epoch_time:.2f} seconds. "
            f"Train Loss: {train_loss / len(train_loader):.4f}, "
            f"Validation Loss: {valid_loss / len(valid_loader):.4f}"
        )
# Step 4: Evaluation Function
def evaluate_model(model, test_loader):
    print("Evaluating model on test data...")
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (features, labels) in enumerate(test_loader):
            features, labels = features.to(device), labels.argmax(dim=1).to(device)
            outputs = model(features)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            if batch_idx % 10 == 0:  # Print progress every 10 batches
                print(f"  Batch {batch_idx}/{len(test_loader)}, Running Accuracy: {100 * correct / total:.2f}%")
    accuracy = 100 * correct / total
    print(f"Final Test Accuracy: {accuracy:.2f}%")
# Step 5: Run Training and Evaluation
train_model(model, train_dl, valid_dl, criterion, optimizer, num_epochs=3)
evaluate_model(model, test_dl)

Extracting model configuration...
Number of channels: 16
Sequence length: 187
Number of classes: 2
Using device: cuda
Starting training...
Epoch 1/3:
  Batch 0/1754, Loss: 0.6916
  Batch 10/1754, Loss: 1.0381
  Batch 20/1754, Loss: 0.6659
  Batch 30/1754, Loss: 0.6946
  Batch 40/1754, Loss: 0.6955
  Batch 50/1754, Loss: 0.7000
  Batch 60/1754, Loss: 0.6835
  Batch 70/1754, Loss: 0.6964
  Batch 80/1754, Loss: 0.6926
  Batch 90/1754, Loss: 0.6999
  Batch 100/1754, Loss: 0.6934
  Batch 110/1754, Loss: 0.6931
  Batch 120/1754, Loss: 0.6940
  Batch 130/1754, Loss: 0.6988
  Batch 140/1754, Loss: 0.6846
  Batch 150/1754, Loss: 0.6840
  Batch 160/1754, Loss: 0.6899
  Batch 170/1754, Loss: 0.6920
  Batch 180/1754, Loss: 0.6821
  Batch 190/1754, Loss: 0.6863
  Batch 200/1754, Loss: 0.6902
  Batch 210/1754, Loss: 0.6933
  Batch 220/1754, Loss: 0.6855
  Batch 230/1754, Loss: 0.6914
  Batch 240/1754, Loss: 0.6877
  Batch 250/1754, Loss: 0.6931
  Batch 260/1754, Loss: 0.6859
  Batch 270/1754, Loss: 

In [18]:
from collections import Counter
print("training" , Counter(train_labels))
print("validation" , Counter(valid_labels))
print("testing" , Counter(test_labels))

training Counter({1: 56235, 0: 56074})
validation Counter({0: 11569, 1: 6394})
testing Counter({1: 11661, 0: 6647})
