In [9]:
import pickle
import re
import random
import numpy as np
from keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
import torch
from torch.utils.data import DataLoader, TensorDataset

In [10]:
def read_EEG_embeddings_labels(path):
    with open(path, 'rb') as file:
        EEG_word_level_embeddings = pickle.load(file)
        EEG_word_level_labels = pickle.load(file)
    return EEG_word_level_embeddings, EEG_word_level_labels

In [11]:
def encode_labels(y):
    label_encoder = LabelEncoder()
    encoded_labels = label_encoder.fit_transform(y)

    y_categorical = to_categorical(encoded_labels)

    return y_categorical

In [12]:
def get_sentences_EEG(labels, EEG_embeddings):
    Sentences = []
    current_sentence = []

    EEG_Sentencs = []
    EEG_index = 0
    for i in range(len(labels)):
        # Check if the word marks the start of a new sentence
        word = labels[i]
        if word == "SOS":
            # If it does, append the current sentence to the list of sentences
            if len(current_sentence) > 0:
                Sentences.append(current_sentence)
                sentence_length = len(current_sentence)
                #print(EEG_index)
                #print(sentence_length)
                EEG_segment = EEG_embeddings[EEG_index:EEG_index+sentence_length]
                EEG_index += sentence_length
                EEG_Sentencs.append(EEG_segment)

                # Start a new sentence
                current_sentence = []
        else:
            # Add the word to the current sentence
            current_sentence.append(word)

    return Sentences, EEG_Sentencs

In [13]:
def pad_sentences(EEG_embeddings, max_length):
    # Pad the sentences to the maximum length
    padded_EEG_sentences = []
    for index in range(len(EEG_embeddings)):
        sentence = EEG_embeddings[index]
        sentence_length = len(sentence)
        if sentence_length < max_length:
            padding_length = max_length - sentence_length
            for _ in range(padding_length):
                sentence.append(np.zeros((105,8)))
        padded_EEG_sentences.append(sentence)
    return padded_EEG_sentences

In [14]:
def reshape_data(X):
    #reshape the data to 840
    new_list = []
    for i in range(len(X)):
        array_list = X[i]
        arrays_list_reshaped = [arr.reshape(-1) for arr in array_list]
        new_list.append(arrays_list_reshaped)

    new_list = np.array(new_list)
    return new_list

In [15]:
train_path = r"C:\Users\gxb18167\PycharmProjects\EEG-To-Text\SIGIR_Development\EEG-GAN\EEG_Text_Pairs_Sentence.pkl"
test_path = r"C:\Users\gxb18167\PycharmProjects\EEG-To-Text\SIGIR_Development\EEG-GAN\Test_EEG_Text_Pairs_Sentence.pkl"




In [16]:
EEG_word_level_embeddings, EEG_word_level_labels = read_EEG_embeddings_labels(train_path)
Test_EEG_word_level_embeddings, Test_EEG_word_level_labels = read_EEG_embeddings_labels(test_path)

EEG_word_level_sentences, EEG_sentence_embeddings = get_sentences_EEG(EEG_word_level_labels, EEG_word_level_embeddings)
Test_EEG_word_level_sentences, Test_EEG_sentence_embeddings = get_sentences_EEG(Test_EEG_word_level_labels, Test_EEG_word_level_embeddings)



In [17]:
#label_path = "insert here"
#train_labels, test_labels = read_EEG_embeddings_labels(label_path)

In [18]:
fake_train_labels = [random.choice([0, 1]) for _ in range(len(EEG_word_level_sentences))]
fake_test_labels = [random.choice([0, 1]) for _ in range(len(Test_EEG_word_level_sentences))]

In [19]:
max_length = max([len(sentence) for sentence in EEG_word_level_sentences])

In [20]:
X_train = pad_sentences(EEG_sentence_embeddings, max_length)
X_train = reshape_data(X_train)

X_test = pad_sentences(Test_EEG_sentence_embeddings, max_length)
X_test = reshape_data(X_test)

train_labels = encode_labels(fake_train_labels)
test_labels = encode_labels(fake_test_labels)