In [14]:
import pickle
import numpy as np
from scipy.spatial import distance

In [3]:
# To load the lists from the file:
with open(r"C:\Users\gxb18167\PycharmProjects\EEG-To-Text\SIGIR_Development\EEG-GAN\EEG_Text_Pairs.pkl", 'rb') as file:
    EEG_word_level_embeddings = pickle.load(file)
    EEG_word_level_labels = pickle.load(file)

In [6]:
#create dictionary with words a labels and the EEG embeddings in a list as the values

EEG_word_level_dict = {}
for i in range(len(EEG_word_level_labels)):
    if EEG_word_level_labels[i] in EEG_word_level_dict:
        EEG_word_level_dict[EEG_word_level_labels[i]].append(EEG_word_level_embeddings[i])
    else:
        EEG_word_level_dict[EEG_word_level_labels[i]] = [EEG_word_level_embeddings[i]]


In [24]:
def average_eeg_segments(eeg_segments):
    """
    Compute the average EEG segment from a list of EEG segments.

    Parameters:
        eeg_segments (list of array-like): List of EEG segment data.

    Returns:
        array-like: Average EEG segment.
    """
    # Stack EEG segments along a new axis to compute the average
    stacked_segments = np.stack(eeg_segments, axis=0)

    # Compute the mean across segments
    avg_segment = np.mean(stacked_segments, axis=0)

    return avg_segment

# Example usage
# Assuming eeg_segments_dict is a dictionary where keys are words and values are lists of EEG segments

# Dictionary containing average EEG segment for each word
average_segments_dict = {}

for word, segments in EEG_word_level_dict.items():
    # Compute average EEG segment for the current word
    avg_segment = average_eeg_segments(segments)

    # Store average segment in dictionary
    average_segments_dict[word] = avg_segment

In [27]:
def convert_to_probability_distribution(eeg_segment):
    """
    Convert EEG segment to a probability distribution.

    Parameters:
        eeg_segment (array-like): EEG segment data.

    Returns:
        array-like: Probability distribution representing the EEG segment.
    """
    # Resize EEG segment to a 1D array
    flattened_segment = eeg_segment.ravel()

    # Normalize the flattened segment
    normalized_segment = (flattened_segment - np.mean(flattened_segment)) / np.std(flattened_segment)

    # Convert normalized segment into probability values
    # For example, you can apply softmax function
    probabilities = np.exp(normalized_segment) / np.sum(np.exp(normalized_segment))

    return probabilities

In [60]:
probability_distribution_dict = {}

for word, segment in average_segments_dict.items():
    probability_distribution_dict[word] = convert_to_probability_distribution(segment)


In [29]:
def compute_js_distance(p, q):
    """
    Compute Jensen-Shannon distance between two probability distributions.

    Parameters:
        p (array-like): Probability distribution.
        q (array-like): Probability distribution.

    Returns:
        float: Jensen-Shannon distance between distributions.
    """
    # Normalize distributions
    p = p / np.sum(p)
    q = q / np.sum(q)

    # Compute average distribution
    m = 0.5 * (p + q)

    # Compute Jensen-Shannon divergence
    js_divergence = 0.5 * (distance.jensenshannon(p, m) + distance.jensenshannon(q, m))

    return js_divergence

In [64]:
#Random example
# Compute Jensen-Shannon distance between two probability distributions

random_distance_dict = {}

for word, segment in probability_distribution_dict.items():


    mean_eeg = np.mean(average_segments_dict[word])
    std_dev_eeg = np.std(average_segments_dict[word])

    random_value = np.random.normal(loc=mean_eeg, scale=std_dev_eeg, size=average_segments_dict[word].shape)
    random_value = convert_to_probability_distribution(random_value)

    js_distance = compute_js_distance(segment, random_value)
    random_distance_dict[word] = js_distance

In [71]:
import nltk
nltk.download('punkt')
from gensim.models import Word2Vec
from nltk.tokenize import word_tokenize


def create_word_label_embeddings(Word_Labels_List):
    tokenized_words = []
    for i in range(len(Word_Labels_List)):
        tokenized_words.append([Word_Labels_List[i]])
    model = Word2Vec(sentences=tokenized_words, vector_size=50, window=5, min_count=1, workers=4)
    word_embeddings = {word: model.wv[word] for word in model.wv.index_to_key}
    print("Number of word embeddings:", len(word_embeddings))

    Embedded_Word_labels = []
    for word in EEG_word_level_labels:
        Embedded_Word_labels.append(word_embeddings[word])

    return Embedded_Word_labels, word_embeddings

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\gxb18167\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [72]:
Embedded_Word_labels, word_embeddings = create_word_label_embeddings(EEG_word_level_labels)

Number of word embeddings: 5860
