In [33]:
import pickle
import re
import random
import numpy as np
from keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder

In [2]:
def read_EEG_embeddings_labels(path):
    with open(path, 'rb') as file:
        EEG_word_level_embeddings = pickle.load(file)
        EEG_word_level_labels = pickle.load(file)
    return EEG_word_level_embeddings, EEG_word_level_labels

In [21]:
def encode_labels(y):
    label_encoder = LabelEncoder()
    encoded_labels = label_encoder.fit_transform(y)

    y_categorical = to_categorical(encoded_labels)

    return y_categorical

In [7]:
def get_sentences_EEG(labels, EEG_embeddings):
    Sentences = []
    current_sentence = []

    EEG_Sentencs = []
    EEG_index = 0
    for i in range(len(labels)):
        # Check if the word marks the start of a new sentence
        word = labels[i]
        if word == "SOS":
            # If it does, append the current sentence to the list of sentences
            if len(current_sentence) > 0:
                Sentences.append(current_sentence)
                sentence_length = len(current_sentence)
                #print(EEG_index)
                #print(sentence_length)
                EEG_segment = EEG_embeddings[EEG_index:EEG_index+sentence_length]
                EEG_index += sentence_length
                EEG_Sentencs.append(EEG_segment)

                # Start a new sentence
                current_sentence = []
        else:
            # Add the word to the current sentence
            current_sentence.append(word)

    return Sentences, EEG_Sentencs

In [8]:
train_path = r"C:\Users\gxb18167\PycharmProjects\EEG-To-Text\SIGIR_Development\EEG-GAN\EEG_Text_Pairs_Sentence.pkl"
test_path = r"C:\Users\gxb18167\PycharmProjects\EEG-To-Text\SIGIR_Development\EEG-GAN\Test_EEG_Text_Pairs_Sentence.pkl"




In [9]:
EEG_word_level_embeddings, EEG_word_level_labels = read_EEG_embeddings_labels(train_path)
Test_EEG_word_level_embeddings, Test_EEG_word_level_labels = read_EEG_embeddings_labels(test_path)

EEG_word_level_sentences, EEG_sentence_embeddings = get_sentences_EEG(EEG_word_level_labels, EEG_word_level_embeddings)
Test_EEG_word_level_sentences, Test_EEG_sentence_embeddings = get_sentences_EEG(Test_EEG_word_level_labels, Test_EEG_word_level_embeddings)



In [10]:
#label_path = "insert here"
#train_labels, test_labels = read_EEG_embeddings_labels(label_path)

In [17]:
fake_train_labels = [random.choice([0, 1]) for _ in range(len(EEG_word_level_sentences))]
fake_test_labels = [random.choice([0, 1]) for _ in range(len(Test_EEG_word_level_sentences))]

In [22]:
train_labels = encode_labels(fake_train_labels)
test_labels = encode_labels(fake_test_labels)

In [28]:
max_length = max([len(sentence) for sentence in EEG_word_level_sentences])

array([[0.19429664, 0.17923741, 0.36307213, 0.50747943, 0.62206709,
        0.51871073, 0.42868289, 0.06850591],
       [0.28140983, 0.57861644, 1.08024967, 0.36859021, 0.36467823,
        0.41722685, 1.14768839, 0.55932534],
       [0.4871715 , 0.79681689, 0.51210332, 0.40454227, 0.55180544,
        0.43953407, 0.75510144, 0.38622764],
       [0.43004686, 0.29673383, 0.3575944 , 0.36961138, 0.3506383 ,
        0.85081506, 1.26278675, 0.41491231],
       [0.3718234 , 0.45663175, 0.42179471, 1.03561676, 0.85461956,
        0.43418464, 0.46566522, 0.33808869],
       [0.49680769, 0.46951395, 0.50899029, 0.15184164, 1.6930244 ,
        0.76735342, 0.57302761, 0.94475245],
       [0.7187323 , 1.07659662, 0.72896671, 1.66536224, 1.39709103,
        1.28687668, 1.10441923, 1.43177247],
       [1.32517946, 1.66541123, 1.60669196, 1.8083328 , 1.39522731,
        1.99193358, 1.38413417, 1.54831302],
       [1.34435618, 1.0585711 , 0.49228552, 1.85182977, 1.85807848,
        1.41647136, 1.353929

In [35]:
def pad_sentences(EEG_embeddings, max_length):
    # Pad the sentences to the maximum length
    padded_EEG_sentences = []
    for index in range(len(EEG_embeddings)):
        sentence = EEG_embeddings[index]
        sentence_length = len(sentence)
        if sentence_length < max_length:
            padding_length = max_length - sentence_length
            for _ in range(padding_length):
                sentence.append(np.zeros((105,8)))
        padded_EEG_sentences.append(sentence)



