In [2]:
import os
import struct
import numpy as np
import pandas as pd

import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio

from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from catboost import CatBoostClassifier

from DataLoader import DataLoader
from DataLoader import DataHolder
data_loader = DataLoader()

Подготовка EEG

In [3]:
file_to_meta = {}
file_to_data = {}
folder_path = '../eeg/wernicke'

for i, filename in enumerate(os.listdir(folder_path)):
    if filename.endswith('.BYT'):
        file_path = os.path.join(folder_path, filename)
        eeg_raw_data = data_loader.read_eeg(file_path)
        meta = data_loader.parse_eeg(eeg_raw_data)
        if len(meta) > 0:
            file_to_meta[filename] = meta
            file_to_data[filename] = eeg_raw_data

In [4]:
def parse_matrix(record_meta, record_data, idx):
    start, end, emotion = record_meta[idx]
    matrix = np.zeros((9, end-start))
    for i in range(start, end):
        _, _, _, values = record_data[i]
        for j, value in enumerate(values):
            matrix[j][i-start] = value
    return matrix, emotion

In [5]:
record_name = 'RechLn&Bueva1.BYT'
record_meta = file_to_meta[record_name]
record_data = file_to_data[record_name]
matrix, emotion = parse_matrix(record_meta, record_data, 0)
matrix.shape, emotion

((9, 5600), 'отвращение')

Подготовка компонент проекции

In [6]:
proj_df_list, proj_raw_data, proj_stat_data, proj_stat_shared_data =\
    data_loader.read_projections('../projections')
index_to_features = data_loader.get_preprocessed_data(proj_raw_data)

Авторегрессионные модели: Использование авторегрессионных моделей, таких как ARIMA (Autoregressive Integrated Moving Average), для предсказания последующих значений временного ряда, и использование параметров модели в качестве эмбеддингов.

In [7]:
import warnings
from statsmodels.tsa.arima.model import ARIMA

warnings.filterwarnings("ignore")

EEG

In [9]:
def get_eeg_embeddings(record_name, index):
    record_meta = file_to_meta[record_name]
    record_data = file_to_data[record_name]
    
    matrix, emotion = parse_matrix(record_meta, record_data, index)
    matrix.shape, emotion

    embeddings_eeg = []
    num_time_series = matrix.shape[0]
    order = (1, 1, 1)

    for i in range(num_time_series):
        time_series = matrix[i, :]
        model = ARIMA(time_series, order=order)
        fit_model = model.fit()
        embedding = fit_model.params
        embeddings_eeg.append(embedding)

    embeddings_eeg = np.array(embeddings_eeg)
    return embeddings_eeg

In [10]:
filename_to_index_to_embeddings = {}

for filename, meta in tqdm(file_to_meta.items()):
    filename_to_index_to_embeddings[filename] = {}
    for index in range(len(meta)):
        filename_to_index_to_embeddings[filename][index] = get_eeg_embeddings(filename, index)

100%|██████████| 50/50 [1:50:40<00:00, 132.82s/it]


In [8]:
import pickle

# with open('filename_to_index_to_embeddings.pkl', 'wb') as f:
#     pickle.dump(filename_to_index_to_embeddings, f)

with open('filename_to_index_to_embeddings.pkl', 'rb') as f:
   filename_to_index_to_embeddings = pickle.load(f)

In [9]:
for filename, meta in file_to_meta.items():

    index_to_embeddings = filename_to_index_to_embeddings[filename]
    
    for index, (_, _, emotion) in enumerate(meta):
        eeg_embeddings = index_to_embeddings[index]
        print(emotion, eeg_embeddings.shape)

    break

гнев (9, 3)
радость (9, 3)
тревога (9, 3)
нейтрально (9, 3)
удивление (9, 3)
удивление (9, 3)
радость (9, 3)
нейтрально (9, 3)
застенчивость (9, 3)
гнев (9, 3)
горе (9, 3)
отвращение (9, 3)
удивление (9, 3)
презрение (9, 3)
презрение (9, 3)
застенчивость (9, 3)
вина (9, 3)
вина (9, 3)
радость (9, 3)
тревога (9, 3)
вина (9, 3)
гнев (9, 3)
нейтрально (9, 3)


PROJECTION

In [10]:
emotion_index_to_component_embeddings = {}

order = (1, 1, 1)
for emotion_index, components in tqdm(index_to_features.items()):
    proj_embeddings = []
    for component_index, component in enumerate(index_to_features[0]):
        model = ARIMA(component, order=order)
        fit_model = model.fit()
        embedding = fit_model.params
        proj_embeddings.append(embedding)
    emotion_index_to_component_embeddings[emotion_index] = np.array(proj_embeddings)

100%|██████████| 12/12 [00:06<00:00,  1.77it/s]


In [11]:
for emotion_index, component_embedings in emotion_index_to_component_embeddings.items():
    emotion = DataHolder.index_to_russian[emotion_index]
    print(emotion, component_embedings.shape)

горе (15, 3)
презрение (15, 3)
любовь (15, 3)
радость (15, 3)
нейтрально (15, 3)
вина (15, 3)
тревога (15, 3)
застенчивость (15, 3)
ужас (15, 3)
удивление (15, 3)
отвращение (15, 3)
гнев (15, 3)


Матрица схожестей

In [12]:
from sklearn.metrics.pairwise import cosine_similarity

def find_nearest_neighbor(embedding_A, embedding_B):
    normalized_A = embedding_A / np.linalg.norm(embedding_A, axis=1, keepdims=True)
    normalized_B = embedding_B / np.linalg.norm(embedding_B, axis=1, keepdims=True)
    similarity_matrix = cosine_similarity(normalized_A, normalized_B)
    nearest_neighbors_indices = np.argmax(similarity_matrix, axis=1)
    return nearest_neighbors_indices

In [13]:
arima_nearest_matrix = np.zeros((9, 15))

for filename, meta in tqdm(file_to_meta.items()):

    index_to_embeddings = filename_to_index_to_embeddings[filename]

    for index, (_, _, emotion) in enumerate(meta):
        eeg_embeddings = index_to_embeddings[index]
        proj_embeddings = emotion_index_to_component_embeddings[DataHolder.russian_to_index[emotion]]

        nearest_neighbors_indices = find_nearest_neighbor(eeg_embeddings, proj_embeddings)
        for eeg_index, proj_index in enumerate(nearest_neighbors_indices):
            arima_nearest_matrix[eeg_index, proj_index] += 1

100%|██████████| 50/50 [00:00<00:00, 222.85it/s]


In [14]:
arima_nearest_matrix

array([[1355.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,
           0.,    0.,  185.,    0.,    0.,    0.],
       [1333.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,
           0.,    0.,  207.,    0.,    0.,    0.],
       [1350.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,
           0.,    0.,  190.,    0.,    0.,    0.],
       [1396.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,
           0.,    0.,  144.,    0.,    0.,    0.],
       [1372.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,
           0.,    0.,  168.,    0.,    0.,    0.],
       [1323.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,
           0.,    0.,  217.,    0.,    0.,    0.],
       [1490.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,
           0.,    0.,   50.,    0.,    0.,    0.],
       [1433.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,
           0.,    0.,  107.,    0.,    0.,    0.],
       [1346.,    0.,   

In [76]:
fig = go.Figure(data=go.Heatmap(z=arima_nearest_matrix, colorscale='Viridis'))

# Добавление значений в ячейках
for i in range(len(arima_nearest_matrix)):
    for j in range(len(arima_nearest_matrix[0])):
        fig.add_annotation(
            x=j, y=i,
            text=f'{int(arima_nearest_matrix[i, j])}',
            showarrow=False,
            font=dict(color='white'),
            xref='x1', yref='y1'
        )

# Настройка макета
fig.update_layout(
    xaxis=dict(title='Номер компоненты проекции', tickvals=list(range(0, len(matrix[0]) + 1))),
    yaxis=dict(title='Индекс сигнала ЭЭГ', tickvals=list(range(0, len(matrix) + 1))),
    title='Число похожестей между сигналами ЭЭГ и компонентами проекции',
    height=650,
    width=850,
    #margin=dict(l=50, r=50, b=50, t=50),
    #xaxis_range=[-0.5, len(matrix[0]) + 0.5],
    #yaxis_range=[-0.5, len(matrix) + 0.5]
)

fig.show()

In [15]:
fig = go.Figure(data=go.Heatmap(
    z=arima_nearest_matrix,
    colorscale='Viridis',
    text=arima_nearest_matrix
))

for i in range(len(arima_nearest_matrix)):
    for j in range(len(arima_nearest_matrix[0])):
        fig.add_annotation(
            x=j, y=i,
            text=f'{int(arima_nearest_matrix[i, j])}',
            showarrow=False,
            font=dict(color='white'),
            xref='x1', yref='y1'
        )

x_tickvals = np.arange(arima_nearest_matrix.shape[1])
x_ticktext = [str(i + 1) for i in range(arima_nearest_matrix.shape[1])]

y_tickvals = np.arange(arima_nearest_matrix.shape[0])
y_ticktext = [str(i + 1) for i in range(arima_nearest_matrix.shape[0])]

fig.update_layout(
    title='Число похожестей между сигналами ЭЭГ и компонентами проекций',
    xaxis=dict(title='Номер компоненты проекции', tickvals=x_tickvals, ticktext=x_ticktext),
    yaxis=dict(title='Номер сигнала ЭЭГ', tickvals=y_tickvals, ticktext=y_ticktext),
    width=750,
    height=550
)

fig.show()