In [19]:
import pickle
import re
import random
import numpy as np
from keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
import torch
from torch.utils.data import DataLoader, TensorDataset

In [20]:
def read_EEG_embeddings_labels(path):
    with open(path, 'rb') as file:
        EEG_word_level_embeddings = pickle.load(file)
        EEG_word_level_labels = pickle.load(file)
    return EEG_word_level_embeddings, EEG_word_level_labels

In [21]:
def encode_labels(y):
    label_encoder = LabelEncoder()
    encoded_labels = label_encoder.fit_transform(y)

    y_categorical = to_categorical(encoded_labels)

    return y_categorical

In [22]:
def get_sentences_EEG(labels, EEG_embeddings):
    Sentences = []
    current_sentence = []

    EEG_Sentencs = []
    EEG_index = 0
    for i in range(len(labels)):
        # Check if the word marks the start of a new sentence
        word = labels[i]
        if word == "SOS":
            # If it does, append the current sentence to the list of sentences
            if len(current_sentence) > 0:
                Sentences.append(current_sentence)
                sentence_length = len(current_sentence)
                #print(EEG_index)
                #print(sentence_length)
                EEG_segment = EEG_embeddings[EEG_index:EEG_index+sentence_length]
                EEG_index += sentence_length
                EEG_Sentencs.append(EEG_segment)

                # Start a new sentence
                current_sentence = []
        else:
            # Add the word to the current sentence
            current_sentence.append(word)

    return Sentences, EEG_Sentencs

In [23]:
def pad_sentences(EEG_embeddings, max_length):
    # Pad the sentences to the maximum length
    padded_EEG_sentences = []
    for index in range(len(EEG_embeddings)):
        sentence = EEG_embeddings[index]
        sentence_length = len(sentence)
        if sentence_length < max_length:
            padding_length = max_length - sentence_length
            for _ in range(padding_length):
                sentence.append(np.zeros((105,8)))
        padded_EEG_sentences.append(sentence)
    return padded_EEG_sentences

In [24]:
def reshape_data(X):
    #reshape the data to 840
    new_list = []
    for i in range(len(X)):
        array_list = X[i]
        arrays_list_reshaped = [arr.reshape(-1) for arr in array_list]
        new_list.append(arrays_list_reshaped)

    new_list = np.array(new_list)
    return new_list

In [25]:
train_path = r"C:\Users\gxb18167\PycharmProjects\EEG-To-Text\SIGIR_Development\EEG-GAN\EEG_Text_Pairs_Sentence.pkl"
test_path = r"C:\Users\gxb18167\PycharmProjects\EEG-To-Text\SIGIR_Development\EEG-GAN\Test_EEG_Text_Pairs_Sentence.pkl"




In [26]:
EEG_word_level_embeddings, EEG_word_level_labels = read_EEG_embeddings_labels(train_path)
Test_EEG_word_level_embeddings, Test_EEG_word_level_labels = read_EEG_embeddings_labels(test_path)

EEG_word_level_sentences, EEG_sentence_embeddings = get_sentences_EEG(EEG_word_level_labels, EEG_word_level_embeddings)
Test_EEG_word_level_sentences, Test_EEG_sentence_embeddings = get_sentences_EEG(Test_EEG_word_level_labels, Test_EEG_word_level_embeddings)



In [27]:
#label_path = "insert here"
#train_labels, test_labels = read_EEG_embeddings_labels(label_path)

In [28]:
fake_train_labels = [random.choice([0, 1]) for _ in range(len(EEG_word_level_sentences))]
fake_test_labels = [random.choice([0, 1]) for _ in range(len(Test_EEG_word_level_sentences))]

In [29]:
max_length = max([len(sentence) for sentence in EEG_word_level_sentences])

In [30]:
X_train = pad_sentences(EEG_sentence_embeddings, max_length)
X_train = reshape_data(X_train)

X_test = pad_sentences(Test_EEG_sentence_embeddings, max_length)
X_test = reshape_data(X_test)

train_labels = encode_labels(fake_train_labels)
test_labels = encode_labels(fake_test_labels)

In [31]:
x_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(train_labels, dtype=torch.float32)


In [36]:
# Create a custom dataset
train_dataset = TensorDataset(x_train_tensor, y_train_tensor)

In [36]:
# Define batch size
batch_size = 32  # Adjust according to your preference

# Create the train loader
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

In [37]:
#classifier
import torch
import torch.nn as nn
import torch.optim as optim

In [38]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [39]:
# Define the BLSTM classifier model
class BLSTMClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, dropout=0.2):
        super(BLSTMClassifier, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_size * 2, num_classes)  # *2 for bidirectional

        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(x.device)  # *2 for bidirectional
        c0 = torch.zeros(self.num_layers * 2, x.size(0), self.hidden_size).to(x.device)

        out, _ = self.lstm(x, (h0, c0))
        out = self.dropout(out)
        out = self.fc(out[:, -1, :])
        return out

In [40]:
# Define model parameters
input_size = 840
hidden_size = 64
num_layers = 2
num_classes = 2

In [41]:

# Instantiate the model
model = BLSTMClassifier(input_size, hidden_size, num_layers, num_classes)
model.to(device)
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [42]:
num_epochs = 10

In [43]:
for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for batch_x, batch_y in train_loader:
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)

        optimizer.zero_grad()
        outputs = model(batch_x)

        # Convert class probabilities to class indices
        _, predicted = torch.max(outputs, 1)

        loss = criterion(outputs, batch_y.squeeze())  # Ensure target tensor is Long type
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(train_loader)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}')
