In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import os
import scipy
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, TensorDataset as TData
from sklearn.model_selection import train_test_split as tts
import pickle
import zipfile
from sklearn.cluster import KMeans
from sklearn.preprocessing import LabelEncoder

In [2]:
!unzip "LHNT_EEG.zip"

Archive:  LHNT_EEG.zip
   creating: LHNT EEG/
   creating: LHNT EEG/Nandini_Senthilkumar_Session6/
  inflating: LHNT EEG/Nandini_Senthilkumar_Session6/right_2.pkl  
  inflating: LHNT EEG/Nandini_Senthilkumar_Session6/right_20.pkl  
  inflating: LHNT EEG/Nandini_Senthilkumar_Session6/left_9.pkl  
  inflating: LHNT EEG/Nandini_Senthilkumar_Session6/left_1.pkl  
  inflating: LHNT EEG/Nandini_Senthilkumar_Session6/right_16.pkl  
  inflating: LHNT EEG/Nandini_Senthilkumar_Session6/left_7.pkl  
  inflating: LHNT EEG/Nandini_Senthilkumar_Session6/left_11.pkl  
  inflating: LHNT EEG/Nandini_Senthilkumar_Session6/right_14.pkl  
  inflating: LHNT EEG/Nandini_Senthilkumar_Session6/right_10.pkl  
  inflating: LHNT EEG/Nandini_Senthilkumar_Session6/right_4.pkl  
  inflating: LHNT EEG/Nandini_Senthilkumar_Session6/left_19.pkl  
  inflating: LHNT EEG/Nandini_Senthilkumar_Session6/left_3.pkl  
  inflating: LHNT EEG/Nandini_Senthilkumar_Session6/right_8.pkl  
  inflating: LHNT EEG/Nandini_Senthilkumar_

In [11]:
def getAllPickles(directory="LHNT EEG"):
    # List all subfolders and collect paths to all .pkl files.
    folders = [drctry for drctry in os.listdir(directory) if os.path.isdir(os.path.join(directory, drctry))]
    files = []
    for folder in folders:
        folder_files = os.listdir(os.path.join(directory, folder))
        for file in folder_files:
            if ".pkl" in file:
                files.append(os.path.join(directory, folder, file))
    return files

def npFromPickle(pickle_files):
    # Load each pickle file and assign a label:
    # 1 if filename contains "right", else 0.
    np_data = []
    labels = []  # 0 is left, 1 is right
    for file in pickle_files:
        with open(file, "rb") as f:
            data1 = pickle.load(f)
            np_data.append(data1[0])
        if 'right' in file.split('/')[-1]:
            labels.append(1)
        else:
            labels.append(0)
    return np_data, labels

np_data, labels = npFromPickle(getAllPickles())
print("Total samples:", len(np_data), "Total labels:", len(labels))

Total samples: 380 Total labels: 380


In [12]:
def bandpass_filter(signal, crit_freq=[1, 40], sampling_freq=125, plot=False, channel=0):
    # Applies a Butterworth bandpass filter.
    order = 4
    b, a = scipy.signal.butter(order, crit_freq, btype='bandpass', fs=sampling_freq)
    processed_signal = scipy.signal.filtfilt(b, a, signal, 1)
    if plot:
        plt.figure()
        plt.xlabel('Time')
        plt.ylabel(f'Normalized amplitude of channel {channel}')
        plt.title(f'{crit_freq[0]}-{crit_freq[1]}Hz bandpass filter')
        signal_min = np.full((signal.shape[1], signal.shape[0]), np.min(signal, 1)).transpose()
        signal_max = np.full((signal.shape[1], signal.shape[0]), np.max(signal, 1)).transpose()
        normed_signal = (signal - signal_min) / (signal_max - signal_min)
        filtered_min = np.full((processed_signal.shape[1], processed_signal.shape[0]), np.min(processed_signal, 1)).transpose()
        filtered_max = np.full((processed_signal.shape[1], processed_signal.shape[0]), np.max(processed_signal, 1)).transpose()
        normed_filt = (processed_signal - filtered_min) / (filtered_max - filtered_min)
        plt.plot(np.arange(normed_signal[channel].size), normed_signal[channel], label='Input')
        plt.plot(np.arange(normed_filt[channel].size), normed_filt[channel], label='Transformed')
        plt.legend()
    return processed_signal

def segmentation(signal, sampling_freq=125, window_size=1, window_shift=0.016):
    # Segments the signal into overlapping windows.
    w_size = int(sampling_freq * window_size)
    w_shift = int(sampling_freq * window_shift)
    segments = []
    i = 0
    while i + w_size <= signal.shape[1]:
        segments.append(signal[:, i: i + w_size])
        i += w_shift
    return segments

def channel_rearrangment(sig, channel_order):
    # Rearranges channels according to the provided order.
    # The channel_order is given in 1-indexed format.
    channel_order = [channel - 1 for channel in channel_order]
    reindexed = np.zeros_like(sig)
    for i, ind in enumerate(channel_order):
        reindexed[i] = sig[ind]
    return reindexed

# Define desired channel order.
ordered_channels = [1, 9, 11, 3, 2, 12, 10, 4, 13, 5, 15, 7, 14, 16, 6, 8]

In [13]:
train_x, test_x, train_y, test_y = tts(np_data, labels, test_size = 0.25)
val_x, test_x = test_x[:len(test_x)//2], test_x[len(test_x)//2:]
val_y, test_y = test_y[:len(test_y)//2], test_y[len(test_y)//2:]

In [14]:
train_eeg = []
train_labels = []
valid_eeg = []
valid_labels = []
test_eeg = []
test_labels = []

for sig, label in zip(train_x, train_y):
    if sig.shape[1] == 0:
        continue
    reindexed_signal = channel_rearrangment(sig, ordered_channels)
    filtered_sig = bandpass_filter(reindexed_signal, [5, 40], 125)
    normed_sig = (filtered_sig - np.mean(filtered_sig, 1, keepdims=True)) / np.std(filtered_sig, 1, keepdims=True)
    if np.isnan(normed_sig).any():
        continue
    signals = segmentation(normed_sig, 125, window_size=1.5, window_shift=0.0175)
    train_eeg.extend(signals)
    train_labels.extend([label] * len(signals))

for sig, label in zip(val_x, val_y):
    if sig.shape[1] == 0:
        continue
    reindexed_signal = channel_rearrangment(sig, ordered_channels)
    filtered_sig = bandpass_filter(reindexed_signal, [5, 40], 125)
    normed_sig = (filtered_sig - np.mean(filtered_sig, 1, keepdims=True)) / np.std(filtered_sig, 1, keepdims=True)
    if np.isnan(normed_sig).any():
        continue
    signals = segmentation(normed_sig, 125, window_size=1.5, window_shift=0.0175)
    valid_eeg.extend(signals)
    valid_labels.extend([label] * len(signals))

for sig, label in zip(test_x, test_y):
    if sig.shape[1] == 0:
        continue
    reindexed_signal = channel_rearrangment(sig, ordered_channels)
    filtered_sig = bandpass_filter(reindexed_signal, [5, 40], 125)
    normed_sig = (filtered_sig - np.mean(filtered_sig, 1, keepdims=True)) / np.std(filtered_sig, 1, keepdims=True)
    if np.isnan(normed_sig).any():
        continue
    signals = segmentation(normed_sig, 125, window_size=1.5, window_shift=0.0175)
    test_eeg.extend(signals)
    test_labels.extend([label] * len(signals))

In [15]:
columns_to_remove = [1, 2, 7, 8]
train_eeg = [np.delete(arr, columns_to_remove, axis=1) for arr in train_eeg]
valid_eeg = [np.delete(arr, columns_to_remove, axis=1) for arr in valid_eeg]
test_eeg  = [np.delete(arr, columns_to_remove, axis=1) for arr in test_eeg]

# -----------------------------
# Convert Data to PyTorch Tensors and Create Datasets
# -----------------------------
train_eeg_tensor = torch.zeros((len(train_eeg), train_eeg[0].shape[0], train_eeg[0].shape[1]))
valid_eeg_tensor = torch.zeros((len(valid_eeg), valid_eeg[0].shape[0], valid_eeg[0].shape[1]))
test_eeg_tensor  = torch.zeros((len(test_eeg),  test_eeg[0].shape[0],  test_eeg[0].shape[1]))

for i in range(len(train_eeg)):
    train_eeg_tensor[i] = torch.from_numpy(train_eeg[i].copy())
for i in range(len(valid_eeg)):
    valid_eeg_tensor[i] = torch.from_numpy(valid_eeg[i].copy())
for i in range(len(test_eeg)):
    test_eeg_tensor[i] = torch.from_numpy(test_eeg[i].copy())

# Create one-hot encoded labels (assuming 2 classes: left and right)
num_classes = 2
train_label_tensor = torch.zeros(len(train_labels), num_classes)
valid_label_tensor = torch.zeros(len(valid_labels), num_classes)
test_label_tensor  = torch.zeros(len(test_labels), num_classes)
for i, val in enumerate(train_labels):
    train_label_tensor[i][val] = 1
for i, val in enumerate(valid_labels):
    valid_label_tensor[i][val] = 1
for i, val in enumerate(test_labels):
    test_label_tensor[i][val] = 1

# Create TensorDatasets.
train_ds = TData(train_eeg_tensor, train_label_tensor)
valid_ds = TData(valid_eeg_tensor, valid_label_tensor)
test_ds  = TData(test_eeg_tensor,  test_label_tensor)

# Create DataLoaders.
train_dl = DataLoader(train_ds, batch_size=64, shuffle=True, drop_last=True)
valid_dl = DataLoader(valid_ds, batch_size=64, shuffle=False, drop_last=True)
test_dl  = DataLoader(test_ds,  batch_size=64, shuffle=False, drop_last=True)
print("Number of batches - Train:", len(train_dl), "Valid:", len(valid_dl), "Test:", len(test_dl))

Number of batches - Train: 1754 Valid: 267 Test: 299


In [16]:
print("Number of batches - Train:", len(train_dl), "Valid:", len(valid_dl), "Test:", len(test_dl))

Number of batches - Train: 1754 Valid: 267 Test: 299


In [17]:
class FixedGammaRBFLayer(nn.Module):
    def __init__(self, in_features, out_features, gamma=0.1):
        super(FixedGammaRBFLayer, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.gamma = gamma
        self.centers = nn.Parameter(torch.Tensor(out_features, in_features))
        self.weights = nn.Parameter(torch.ones(out_features))
        self.reset_parameters()

    def reset_parameters(self):
        nn.init.xavier_uniform_(self.centers)
        nn.init.constant_(self.weights, 1.0)

    def forward(self, x):
        # x: (batch, in_features)
        x_expanded = x.unsqueeze(1)                 # (batch, 1, in_features)
        centers_expanded = self.centers.unsqueeze(0)  # (1, out_features, in_features)
        diff = x_expanded - centers_expanded
        dist_sq = torch.sum(diff ** 2, dim=2)         # (batch, out_features)
        rbf_out = torch.exp(-self.gamma * dist_sq)    # RBF activation.
        rbf_out = rbf_out * self.weights              # Scale by trainable weights.
        return rbf_out

class RBFNetwork(nn.Module):
    def __init__(self, input_dim, num_rbf_units, num_classes,
                 gamma=0.1, hidden_dim=512, dropout_prob=0.5):
        super(RBFNetwork, self).__init__()
        # RBF layer: transforms flattened input into RBF activations.
        self.rbf = FixedGammaRBFLayer(input_dim, num_rbf_units, gamma=gamma)
        self.dropout = nn.Dropout(dropout_prob)
        # Classifier: two-layer FC network with ReLU and dropout.
        self.fc = nn.Sequential(
            nn.Linear(num_rbf_units, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout_prob),
            nn.Linear(hidden_dim, num_classes)
        )

    def forward(self, x):
        # Flatten input: (batch, channels, sequence_length) -> (batch, input_dim)
        x = x.view(x.size(0), -1)
        x_rbf = self.rbf(x)
        out = self.fc(x_rbf)
        return out

def initialize_rbf_centers(model, data_loader, num_samples=1000):
    # Use KMeans clustering on a subset of training data to initialize RBF centers.
    model.eval()
    all_features = []
    total_samples = 0
    for features, _ in data_loader:
        features = features.to(device)
        features = features.view(features.size(0), -1)
        with torch.no_grad():
            all_features.append(features.cpu())
        total_samples += features.size(0)
        if total_samples >= num_samples:
            break
    all_features = torch.cat(all_features, dim=0)
    kmeans = KMeans(n_clusters=model.rbf.out_features, random_state=0)
    kmeans.fit(all_features.numpy())
    centers = torch.tensor(kmeans.cluster_centers_, dtype=torch.float32, device=device)
    model.rbf.centers.data.copy_(centers)
    print("RBF centers initialized using KMeans on input features.")

def train_model_full(model, train_loader, criterion, optimizer, num_epochs=20):
    print("Training on the full dataset...")
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        for features, labels in train_loader:
            features = features.to(device)
            labels = labels.argmax(dim=1).to(device)
            optimizer.zero_grad()
            outputs = model(features)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        avg_loss = train_loss / len(train_loader)
        print(f"Epoch {epoch+1}/{num_epochs} | Train Loss: {avg_loss:.4f}")

def evaluate_model(model, data_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for features, labels in data_loader:
            features = features.to(device)
            labels = labels.argmax(dim=1).to(device)
            outputs = model(features)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print(f"Accuracy: {accuracy:.2f}%")
    return accuracy

# -----------------------------
# Train on the Full Dataset (No Cross Validation)
# -----------------------------
# Create DataLoaders for the full training, validation, and test sets.
full_train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
valid_loader = DataLoader(valid_ds, batch_size=64, shuffle=False)
test_loader  = DataLoader(test_ds,  batch_size=64, shuffle=False)

# Determine the input dimension and number of classes.
sample_feature, sample_label = train_ds[0]
input_dim = sample_feature.numel()       # Flattened input dimension.
num_classes = sample_label.shape[0]        # Number of classes.

# Use GPU if available.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Initialize the RBF network.
model_full = RBFNetwork(input_dim=input_dim,
                        num_rbf_units=256,
                        num_classes=num_classes,
                        gamma=0.1,
                        hidden_dim=512,
                        dropout_prob=0.5).to(device)

# Optionally initialize the RBF centers using KMeans.
initialize_rbf_centers(model_full, full_train_loader, num_samples=500)

# Set up optimizer and loss function.
optimizer_full = optim.Adam(model_full.parameters(), lr=0.001, weight_decay=1e-4)
criterion = nn.CrossEntropyLoss()

# Train the model on the full training set.
train_model_full(model_full, full_train_loader, criterion, optimizer_full, num_epochs=20)

# Evaluate the trained model on the validation set.
print("\nValidation Set Evaluation:")
evaluate_model(model_full, valid_loader)

# Evaluate the trained model on the test set.
print("\nTest Set Evaluation:")
evaluate_model(model_full, test_loader)


Using device: cuda
RBF centers initialized using KMeans on input features.
Training on the full dataset...
Epoch 1/20 | Train Loss: 0.6931
Epoch 2/20 | Train Loss: 0.6930
Epoch 3/20 | Train Loss: 0.6929
Epoch 4/20 | Train Loss: 0.6929
Epoch 5/20 | Train Loss: 0.6930
Epoch 6/20 | Train Loss: 0.6929
Epoch 7/20 | Train Loss: 0.6930
Epoch 8/20 | Train Loss: 0.6929
Epoch 9/20 | Train Loss: 0.6929
Epoch 10/20 | Train Loss: 0.6929
Epoch 11/20 | Train Loss: 0.6930
Epoch 12/20 | Train Loss: 0.6929
Epoch 13/20 | Train Loss: 0.6929
Epoch 14/20 | Train Loss: 0.6929
Epoch 15/20 | Train Loss: 0.6929
Epoch 16/20 | Train Loss: 0.6929
Epoch 17/20 | Train Loss: 0.6929
Epoch 18/20 | Train Loss: 0.6929
Epoch 19/20 | Train Loss: 0.6929
Epoch 20/20 | Train Loss: 0.6929

Validation Set Evaluation:
Accuracy: 57.07%

Test Set Evaluation:
Accuracy: 36.45%


36.45083932853717