In [None]:
import sys
  
# append the path of the
# parent directory
sys.path.append("..")

import os
import glob
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation
import json, csv
import mne
import scipy
import pandas
import copy
import mbt_pyspt as mbt
from mbt_pyspt.models.eegdata import EEGData
from mbt_pyspt.models.mybraineegdata import MyBrainEEGData
from mbt_pyspt.modules.preprocessingflow import PreprocessingFlow
from mbt_pyspt.modules.featuresextractionflow import FeaturesExtractionFlow
from utils.em_data_loader import generate_participants_datasets, generate_participants_events, load_dataset
from utils.em_plotting import plot_annotations, plot_avg_annotations, plot_study, plot_signal, plot_trial_annotations, plot_avg_annotations_all_participants, plot_linear_regression
from preprocessing.em_preprocessing import participant_avg_annotation_windows, preprocess_em_participant, compute_participant_features
from preprocessing.em_qi_cleaner import qi_data_removal
from classification.em_linear_regression import lin_regression, valence_and_neuromarker, arousal_and_neuromarker, liking_familiarity_correlation
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection  import train_test_split
from pprint import pprint
from statistics import mean
from meegkit.asr import ASR
from meegkit.utils.matrix import sliding_window

print("All libraries successfully loaded!")

### Data Loading & Preprocessing
Loads the entire dataset in memory and saves the participants keys in an array for later use. Warning: computation is slow and time consuming, use only when necessary.

In [None]:
# Load the entire Emotion-Music dataset
dataset = load_dataset()
participants = dataset.keys()
print(participants)
list_pp = [
    #('notch_filter', {"freqs": [50,100]}),
    #('filter_mne', { "l_freq": 0.01, "h_freq": 30}),
    ('remove_dc_sec_by_sec', None), 
    #('median_filter', None),
    #('sliding_split_eeg_data', { 'buf': 5, 'sliding_step': 1}),
    #('interpolate_all_qc', None)
    ]

bpass_freqs = {'l_freq': 0.1, 'h_freq': 20}
notch_freqs = (50, 100)
qi_cleaning = True
for participant_id in dataset:
    prep_participant = preprocess_em_participant(dataset[participant_id], list_pp, bpass_freqs=bpass_freqs, notch_freqs=notch_freqs, asr_cleaning=False)
    if qi_cleaning:
        qi_data_removal(prep_participant, trial_duration=60, qi_window_size=6, qi_threshold=0.35, allowed_loss=50)
    dataset[participant_id] = prep_participant

### Data Plotting

In [None]:
# Use ONLY to visualize the annotations
n_windows = 60 / window_size
for participant_id in prep_dataset:
    prep_dataset[participant_id] = participant_avg_annotation_windows(prep_dataset[participant_id], n_windows)
    plot_avg_annotations(prep_dataset[participant_id])


In [None]:
# Single trial annotations plotting
participant_id = 's010701'
trial_label = 'EO/class_1_A'
x = prep_dataset[participant_id]['trials'][trial_label]['annotations']['x']
y = prep_dataset[participant_id]['trials'][trial_label]['annotations']['y']
plot_trial_annotations(x, y, trial_label, animated=False)
x = prep_dataset[participant_id]['trials'][trial_label]['annotations']['c_x']
y = prep_dataset[participant_id]['trials'][trial_label]['annotations']['c_y']
plot_trial_annotations(x, y, trial_label, animated=False)

In [None]:
%matplotlib notebook
# Animated trial annotations plotting

participant_id = 's010701'
trial_label = 'EO/class_3_A'
valence = prep_dataset[participant_id]['trials'][trial_label]['annotations']['x']
arousal = prep_dataset[participant_id]['trials'][trial_label]['annotations']['y']
marker = 'x'
color = 'red'
label = 'Annotations'
if trial_label.__contains__('class_1'):
    marker = '^'
    color = 'green'
    label= 'HAHV'
if trial_label.__contains__('class_2'):
    marker = '>'
    color = 'cyan'
    label= 'LAHV'
if trial_label.__contains__('class_3'):
    marker = 'v'
    color = 'blue'
    label= 'LALV'
if trial_label.__contains__('class_4'):
    marker = '<'
    color = 'orange'
    label= 'HALV'
    
fig, ax = plt.subplots()
x, y = [],[]
sc = ax.scatter(x, y, c=color, marker=marker, label=label)
plt.xlim(-0.5, 0.5)
plt.ylim(-0.5, 0.5)
plt.grid(True)
def animate(i):
    x.append(valence[i])
    y.append(arousal[i])
    sc.set_offsets(np.c_[x,y])

ani = matplotlib.animation.FuncAnimation(fig, animate, 
                frames=len(valence), interval=10, repeat=True, blit=True) 
plt.show()

In [None]:
plot_avg_annotations_all_participants(prep_dataset)


In [None]:
# K-Means example
from sklearn.cluster import kmeans_plusplus
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt

# Generate sample data
n_samples = 4000
n_components = 4

X, y_true = make_blobs(n_samples=n_samples,
                       centers=n_components,
                       cluster_std=0.60,
                       random_state=0)


# Calculate seeds from kmeans++
centers_init, indices = kmeans_plusplus(X, n_clusters=4,
                                        random_state=0)

# Plot init seeds along side sample data
plt.figure(1)
colors = ['#4EACC5', '#FF9C34', '#4E9A06', 'm']

for k, col in enumerate(colors):
    cluster_data = y_true == k
    plt.scatter(X[cluster_data, 0], X[cluster_data, 1],
                c=col, marker='.', s=10)

plt.scatter(centers_init[:, 0], centers_init[:, 1], c='b', s=50)
plt.title("K-Means++ Initialization")
plt.xticks([])
plt.yticks([])
plt.show()