# Loading Packages

In [1]:
import matplotlib.pyplot as plt
import pandas as pd

from src.settings import Paths, Settings
from src.data_loader import VerbMemEEGDataLoader, PilotEEGDataLoader, CLEARDataLoader
from src.data_preprocess import DataPreprocessor
from src.visualization.EEG_vissualizer import visualize_erp, visualize_block_ERP, visualize_feature_box_plot, \
    visualize_block_features
from src.data_loader import EEGDataSet
from src.feature_extraction import FeatureExtractor
import numpy as np
import seaborn as sns

%load_ext autoreload
import copy
from src.experiments.utils.train_gplvm_utils import *
from sklearn.model_selection import KFold
from sklearn import linear_model
from sklearn.svm import SVC

# Load Settings

In [2]:
# Load settings from settings.json
settings = Settings()  # Initialize settings object
settings.load_settings()  # Load settings from a JSON file

# Set up paths for data
paths = Paths(settings)  # Initialize paths object with loaded settings
paths.load_device_paths()  # Load device-specific paths
paths.create_paths()  # Create any necessary file paths

Patient: all


# Load Data

In [3]:
dataset = PilotEEGDataLoader(paths=paths, settings=settings)
dataset.load_data(patient_ids=settings.patient)

Subject 0 from 12: 101_toi1gng_2023-11-20_14-09-06_1 Load Data
Subject 1 from 12: 102_toi1gng_2023-11-14_13-59-28_1 Load Data
Subject 2 from 12: 103_toi1gng_2023-11-29_13-35-36_1 Load Data
Subject 3 from 12: 104_toi1gng_2023-11-30_14-45-53_1 Load Data
Subject 4 from 12: 106_toi1gng_2023-12-05_10-01-06_1 Load Data
Subject 5 from 12: 110_toi1gng2t_2024-01-05_13-10-01_1 Load Data
Subject 6 from 12: 111_toi1gng2t_2024-01-09_11-59-24_1 Load Data
Subject 7 from 12: 112_toi1gng2t_2024-02-02_13-58-25_1 Load Data
Subject 8 from 12: 113_toi1gng2t_2024-02-01_14-34-31_1 Load Data
Subject 9 from 12: 114_toi1gng2t_2024-01-23_14-25-05_1 Load Data
Subject 10 from 12: 115_toi1gng2t_2024-02-07_15-04-03_1 Load Data
Subject 11 from 12: 116_toi1gng_2023-12-07_14-09-10_1 Load Data


In [None]:
from scipy.io import savemat
for i in range(12):
    single_patient_data = dataset.all_patient_data[list(dataset.all_patient_data.keys())[i]]
    label_df = pd.DataFrame(single_patient_data.labels)
    block_index = (label_df['is_correct'] == True) & (label_df['stim'] != 'ctl')


    data_dict = {
        'data': single_patient_data.data[block_index],
        'time_ms': single_patient_data.time_ms,
        'block_id': label_df[block_index]['block_number'].values,
        'block_type': label_df[block_index]['block_type'].values,
        'is_experienced': label_df[block_index]['is_experienced'].values,
        'is_resp': label_df[block_index]['is_resp'].values,
        'is_correct': label_df[block_index]['is_correct'].values,
        'go_nogo': label_df[block_index]['go_nogo'].values,
        'fs': single_patient_data.fs,
        'response_time': single_patient_data.response_time[block_index],
        'file_name':single_patient_data.file_name

    }


    savemat(f'patient{single_patient_data.file_name.split("_")[0]}.mat', data_dict)

In [4]:
import pickle
for i in range(12):
    single_patient_data = dataset.all_patient_data[list(dataset.all_patient_data.keys())[i]]
    label_df = pd.DataFrame(single_patient_data.labels)
    block_index = (label_df['is_correct'] == True) & (label_df['stim'] != 'ctl')


    data_dict = {
        'data': single_patient_data.data[block_index],
        'time_ms': single_patient_data.time_ms,
        'block_id': label_df[block_index]['block_number'].values,
        'block_type': label_df[block_index]['block_type'].values,
        'is_experienced': label_df[block_index]['is_experienced'].values,
        'is_resp': label_df[block_index]['is_resp'].values,
        'is_correct': label_df[block_index]['is_correct'].values,
        'go_nogo': label_df[block_index]['go_nogo'].values,
        'fs': single_patient_data.fs,
        'response_time': single_patient_data.response_time[block_index],
        'file_name':single_patient_data.file_name

    }


    # Define the file path
    file_path = f'G:\\.shortcut-targets-by-id\\1cX90O_ArtclHDzk2LNzWWS9RYvVLVteA\\Pilot1\\patient{single_patient_data.file_name.split("_")[0]}.pkl'

    # Open a file and save the dictionary using pickle
    with open(file_path, 'wb') as file:
        pickle.dump(data_dict, file)

    print(f"Data saved to {file_path}")

Data saved to G:\.shortcut-targets-by-id\1cX90O_ArtclHDzk2LNzWWS9RYvVLVteA\Pilot1\patient101.pkl
Data saved to G:\.shortcut-targets-by-id\1cX90O_ArtclHDzk2LNzWWS9RYvVLVteA\Pilot1\patient102.pkl
Data saved to G:\.shortcut-targets-by-id\1cX90O_ArtclHDzk2LNzWWS9RYvVLVteA\Pilot1\patient103.pkl
Data saved to G:\.shortcut-targets-by-id\1cX90O_ArtclHDzk2LNzWWS9RYvVLVteA\Pilot1\patient104.pkl
Data saved to G:\.shortcut-targets-by-id\1cX90O_ArtclHDzk2LNzWWS9RYvVLVteA\Pilot1\patient106.pkl
Data saved to G:\.shortcut-targets-by-id\1cX90O_ArtclHDzk2LNzWWS9RYvVLVteA\Pilot1\patient110.pkl
Data saved to G:\.shortcut-targets-by-id\1cX90O_ArtclHDzk2LNzWWS9RYvVLVteA\Pilot1\patient111.pkl
Data saved to G:\.shortcut-targets-by-id\1cX90O_ArtclHDzk2LNzWWS9RYvVLVteA\Pilot1\patient112.pkl
Data saved to G:\.shortcut-targets-by-id\1cX90O_ArtclHDzk2LNzWWS9RYvVLVteA\Pilot1\patient113.pkl
Data saved to G:\.shortcut-targets-by-id\1cX90O_ArtclHDzk2LNzWWS9RYvVLVteA\Pilot1\patient114.pkl
Data saved to G:\.shortcut-tar

In [None]:
single_patient_data.response_time[block_index]

In [None]:
single_patient_data.file_name.split("_")[0]


# Preprocess Data
1. remove_baseline(normalize=False, baseline_t_min=-1000)
2. low_pass_filter(cutoff=45, order=5)


In [None]:
data_preprocessor = DataPreprocessor(paths=paths, settings=settings)  # Initialize data preprocessor
dataset = data_preprocessor.preprocess(dataset)  # Apply preprocessing steps to the dataset

# Patient 101 Analysis
- Step 1: We filter based on the correct asnwers
- Step 2: We remove the ctl stimuli

In [None]:
single_patient_data = copy.copy(dataset.all_patient_data[list(dataset.all_patient_data.keys())[0]])
print(f"Selected patient is {single_patient_data.file_name.split('_')[0]}")
channels = single_patient_data.channel_names

## step 3
-  Plot ERP in each block

In [None]:
for idx, channel_idx in enumerate(channels[:10]):
    fig, _ = visualize_block_ERP(single_patient_data, stim='w', channel_idx=idx, axs=None, fig=None, show_plot=True)

# Classify based on this features

# Analyze time features

In [None]:
def get_time_idx(time, start_time, end_time):
    start_index = np.argmin(np.abs(time - start_time))
    end_index = np.argmin(np.abs(time - end_time))
    return start_index, end_index

In [None]:
time_features = {}
start_index, end_index = get_time_idx(single_patient_data.time_ms, 150, 250)
time_features['n200'] = np.mean(single_patient_data.data[:, :, start_index:end_index], axis=-1)

start_index, end_index = get_time_idx(single_patient_data.time_ms, 250, 500)
time_features['p300'] = np.mean(single_patient_data.data[:, :, start_index:end_index], axis=-1)

start_index, end_index = get_time_idx(single_patient_data.time_ms, 500, 750)
time_features['post_300'] = np.mean(single_patient_data.data[:, :, start_index:end_index], axis=-1)

In [None]:
for idx, channel_idx in enumerate(channels[:10]):
    visualize_block_features(single_patient_data, time_features, channel_idx=idx, stim='w', fig=None, axs=None,show_plot=True)

# Training models

## 1. prepaire and spilit data

In [None]:
training_features = {}
for key in time_features.keys():
    for ch_idx, ch in enumerate(single_patient_data.channel_names):
        training_features[key + ' ' + ch] = time_features[key][:,ch_idx]
training_features_df = pd.DataFrame(training_features)
training_features_df.head()

## 2. K fold training

In [None]:
def train_kfold(X, y, model):
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    folds = kf.split(X)
    for fold_idx, (train_index, test_index) in enumerate(folds):
        x_train, x_test = X.values[train_index], X.values[test_index]
        y_train, y_test = y[train_index], y[test_index]
        model.fit(x_train, y_train)
        predictions = model.predict(x_test)
        print(predictions)
        f1 = f1_score(y_test, predictions>=0.5, average='macro')
        print(f"F1-score: {f1 * 100:.2f}%")

        report = classification_report(y_true=y_test, y_pred=predictions>0.5)
        print(report)


## 3. Separate each block and train

In [None]:
from src.experiments.utils.train_gplvm_utils import *
from sklearn.model_selection import KFold

training_features = {}
for key in time_features.keys():
    for ch_idx, ch in enumerate(single_patient_data.channel_names):
        training_features[key + ' ' + ch] = time_features[key][:,ch_idx]
training_features_df = pd.DataFrame(training_features)
training_features_df.head()

stim = 'w'
block_idx = np.unique(single_patient_data.labels['block_number'])
label_df = pd.DataFrame(single_patient_data.labels)
blocks = {idx: list(np.unique(label_df[label_df['block_number'] == idx]['block_type'])) for idx in block_idx}
blocks_with_stim = [idx for idx, vals in blocks.items() if any(stim in val for val in vals)]


for i in range(len(blocks_with_stim)):
    print(f"============ Block {blocks_with_stim[i]} {blocks[blocks_with_stim[i]]} ===============")
    block_index = (label_df['block_number'] == blocks_with_stim[i]) & (label_df['is_correct'] == True) & (
            label_df['stim'] != 'ctl')
    labels = {key: label_df[key][block_index].to_list() for key in label_df.columns.to_list()}
    single_channel_feature_df = training_features_df[block_index.values].copy()
    fs = single_patient_data.fs
    time_ms = single_patient_data.time_ms
    channel_names = single_patient_data.channel_names
    file_name = single_patient_data.file_name
    single_channel_feature_df.head()
    model = xgb.XGBClassifier(objective="multi:softmax", num_class=2)
    model = linear_model.Lasso(alpha=1)
    model = SVC()
    train_kfold(X=single_channel_feature_df, y=np.array(labels['is_experienced']))

print(f"============ Combined Blocks ===============")
block_index = label_df['block_number'].isin(blocks_with_stim) & \
                  (label_df['is_correct'] == True) & \
                  (label_df['stim'] != 'ctl')
labels = {key: label_df[key][block_index].to_list() for key in label_df.columns.to_list()}
single_channel_feature_df = training_features_df[block_index.values].copy()
fs = single_patient_data.fs
time_ms = single_patient_data.time_ms
channel_names = single_patient_data.channel_names
file_name = single_patient_data.file_name
single_channel_feature_df.head()

train_kfold(X=single_channel_feature_df, y=np.array(labels['is_experienced']))

# Step 3: find p-values

In [None]:
from scipy.stats import ttest_ind

channel_idx = 2
stim = 'w'

channel_name = single_patient_data.channel_names[channel_idx]
block_idx = np.unique(single_patient_data.labels['block_number'])
label_df = pd.DataFrame(single_patient_data.labels)

# specify blocks with a stim 'w' or 'i'
blocks = {idx: list(np.unique(label_df[label_df['block_number'] == idx]['block_type'])) for idx in block_idx}
blocks_with_stim = [idx for idx, vals in blocks.items() if any(stim in val for val in vals)]

#select specific channel
time_feature_df = pd.DataFrame({key: time_features[key][:, channel_idx] for key in time_features.keys()})

hue_column = 'is_experienced'
palette = {True: "green", False: "red"}
# Bar plot data preparation
results = []

for block in blocks_with_stim:
    block_data = label_df[label_df['block_number'] == block]
    block_data = block_data[block_data['is_correct'] & (block_data['stim'] != 'ctl')]
    single_channel_feature_df = time_feature_df.loc[block_data.index]
    single_channel_feature_df['is_experienced'] = block_data['is_experienced']

    for feature_name, data in single_channel_feature_df.items():
        if feature_name == 'is_experienced':
            continue
        exp = data[block_data['is_experienced']]
        inexp = data[~block_data['is_experienced']]

        t_stat, p_val = ttest_ind(exp.dropna(), inexp.dropna(), equal_var=False)
        results.append({
            'Block': f'Block {block}',
            'Feature': feature_name,
            'P-Value': p_val,
            'T-Statistic': t_stat
        })

results_df = pd.DataFrame(results)

# Plotting
plt.figure(figsize=(12, 8))
sns.barplot(x='Feature', y='P-Value', hue='Block', data=results_df,
            palette=np.where(results_df['T-Statistic'] > 0, 'blue', 'red'))
#plt.yscale('log')  # Log scale for better visibility if small p-values are present
plt.title(f'P-Values by Block and Feature for {channel_name}')
plt.ylabel('P-Value (log scale)')
plt.xlabel('Feature')
plt.show()



# Select Channel Groups
### Channel groups:

In [None]:
all_channels = single_patient_data.channel_names
channel_groups = {
        "CF": ['C15', 'C16', 'C17', 'C18', 'C19', 'C28', 'C29'],
        "LAL": ['B30', 'B31', 'D5', 'D6', 'D7', 'D8', 'D9'],
        "LAM": ['D2', 'D3', 'D4', 'D12', 'D13', 'C24', 'C25'],
        "RAM": ['C2', 'C3', 'C4', 'C11', 'C12', 'B31', 'B32'],
        "RAL": ['C5', 'C6', 'C7', 'C8', 'C9', 'B27', 'B28'],
        "LOT": ['A10', 'A11', 'D26', 'D27', 'D30', 'D31', 'D32'],
        "LPM": ['A5', 'A6', 'A7', 'A18', 'D16', 'D17', 'D28'],
        "RPM": ['A31', 'A32', 'B2', 'B3', 'B4', 'B18', 'B19'],
        "ROT": ['B7', 'B8', 'B10', 'B11', 'B12', 'B16', 'B17'],
        "CO": ['A14', 'A15', 'A22', 'A23', 'A24', 'A27', 'A28']}

channel_groups_indices = {group: [all_channels.index(channel) for channel in channels_name] for group, channels_name in channel_groups.items()}
for group_name, indices in channel_groups_indices.items():
    print(f"{group_name}: {indices}")

## Case 1: Use the average of each group

In [None]:
single_patient_data_group_avg = EEGDataSet()
new_channel_names, new_data = [], []
for group_name, channel_indices in channel_groups_indices.items():
    print(f"group {group_name}: {[all_channels[ch - 1] for ch in channel_indices]}")
    new_data.append(np.mean(single_patient_data.data[:, np.array(channel_indices), :], axis=1, keepdims=True))
    new_channel_names.append(group_name)
new_data = np.concatenate(new_data, axis=1)
single_patient_data_group_avg.data = new_data
single_patient_data_group_avg.channel_names = new_channel_names
single_patient_data_group_avg.fs = single_patient_data.fs
single_patient_data_group_avg.labels = single_patient_data.labels
single_patient_data_group_avg.file_name = single_patient_data.file_name
single_patient_data_group_avg.time_ms = single_patient_data.time_ms



# Visualize the ERP in Groups

In [None]:
for idx, channel_idx in enumerate(new_channel_names):
    fig, _ = visualize_block_ERP(single_patient_data_group_avg, stim='w', channel_idx=idx, axs=None, fig=None, show_plot=True)

In [None]:
time_features_group_avg = {}
start_index, end_index = get_time_idx(single_patient_data_group_avg.time_ms, 150, 250)
time_features_group_avg['n200'] = np.mean(single_patient_data_group_avg.data[:, :, start_index:end_index], axis=-1)

start_index, end_index = get_time_idx(single_patient_data_group_avg.time_ms, 250, 500)
time_features_group_avg['p300'] = np.mean(single_patient_data_group_avg.data[:, :, start_index:end_index], axis=-1)

start_index, end_index = get_time_idx(single_patient_data_group_avg.time_ms, 500, 750)
time_features_group_avg['post_300'] = np.mean(single_patient_data_group_avg.data[:, :, start_index:end_index], axis=-1)

In [None]:
for idx, channel_idx in enumerate(single_patient_data_group_avg.channel_names):
    visualize_block_features(single_patient_data_group_avg, time_features_group_avg, channel_idx=idx, stim='w', fig=None, axs=None,show_plot=True)

# Train Model

In [None]:
training_features_gp_avg = {}
for key in time_features.keys():
    for ch_idx, ch in enumerate(single_patient_data_group_avg.channel_names):
        training_features_gp_avg[key + ' ' + ch] = time_features_group_avg[key][:,ch_idx]
training_features_df_gp_avg = pd.DataFrame(training_features_gp_avg)
training_features_df_gp_avg.head()


In [None]:
from src.experiments.utils.train_gplvm_utils import *
from sklearn.model_selection import KFold

stim = 'i'
block_idx = np.unique(single_patient_data.labels['block_number'])
label_df = pd.DataFrame(single_patient_data.labels)
blocks = {idx: list(np.unique(label_df[label_df['block_number'] == idx]['block_type'])) for idx in block_idx}
blocks_with_stim = [idx for idx, vals in blocks.items() if any(stim in val for val in vals)]


for i in range(len(blocks_with_stim)):
    print(f"============ Block {blocks_with_stim[i]} {blocks[blocks_with_stim[i]]} ===============")
    block_index = (label_df['block_number'] == blocks_with_stim[i]) & (label_df['is_correct'] == True) & (
            label_df['stim'] != 'ctl')
    labels = {key: label_df[key][block_index].to_list() for key in label_df.columns.to_list()}
    single_channel_feature_df = training_features_df[block_index.values].copy()
    fs = single_patient_data.fs
    time_ms = single_patient_data.time_ms
    channel_names = single_patient_data.channel_names
    file_name = single_patient_data.file_name
    single_channel_feature_df.head()

    train_kfold(X=single_channel_feature_df, y=np.array(labels['is_experienced']))

print(f"============ Combined Blocks ===============")
block_index = label_df['block_number'].isin(blocks_with_stim) & \
                  (label_df['is_correct'] == True) & \
                  (label_df['stim'] != 'ctl')
labels = {key: label_df[key][block_index].to_list() for key in label_df.columns.to_list()}
single_channel_feature_df = training_features_df[block_index.values].copy()
fs = single_patient_data.fs
time_ms = single_patient_data.time_ms
channel_names = single_patient_data.channel_names
file_name = single_patient_data.file_name
single_channel_feature_df.head()

train_kfold(X=single_channel_feature_df, y=np.array(labels['is_experienced']))

# Case 2: Use the eigen vector

The 128Hz sampling rate is too much. It's better to use the lower sampling rate. So we Filter the data with a lowpass filter wirh cutoff frequency of 15Hz and then downsample it to 30Hz

In [None]:
from scipy.signal import resample_poly, butter, filtfilt

def resample(single_patient_data, f_resample, anti_alias_filter=False):
    # Compute resampling factor
    # Compute resampling factor
    resample_factor = single_patient_data.fs // f_resample

    # Design a low-pass anti-aliasing filter
    nyquist_freq = 0.5 * single_patient_data.fs
    cutoff_freq = 0.5 * f_resample
    b, a = butter(4, cutoff_freq / nyquist_freq, btype='low')

    if anti_alias_filter is True:
        # Apply the anti-aliasing filter to EEG signal
        filtered_signal = filtfilt(b, a, single_patient_data.data, axis=-1)
    else:
        filtered_signal = single_patient_data.data

    # Resample the EEG signal
    resampled_signal = resample_poly(filtered_signal, 1, resample_factor, axis=-1)

    # Resample the time vector
    resampled_time_ms = resample_poly(single_patient_data.time_ms, 1, resample_factor)

    single_patient_data.data = resampled_signal
    single_patient_data.time_ms = resampled_time_ms
    single_patient_data.fs = f_resample

    return single_patient_data

def remove_baseline(single_patient_data, baseline_t_min=-300, baseline_t_max=0, normalize=True):

    # Determine start and end indices for the baseline time window
    idx_start = np.argmin(np.abs(single_patient_data.time_ms - baseline_t_min))
    idx_end = np.argmin(np.abs(single_patient_data.time_ms - baseline_t_max))

    if (idx_end - idx_start) > 5:
        # Calculate the baseline mean values for each trial and channel
        baseline_means = np.mean(single_patient_data.data[:, :, idx_start:idx_end], axis=2, keepdims=True)

        # Subtract the baseline mean from all time points for each trial and channel
        eeg_data_baseline_removed = single_patient_data.data - baseline_means

        if normalize is True:
            maximum = np.quantile(np.abs(eeg_data_baseline_removed[:, :, idx_start:idx_end]),
                                  0.99, axis=2, keepdims=True)
            eeg_data_baseline_removed = eeg_data_baseline_removed / maximum
    single_patient_data.data = eeg_data_baseline_removed

    return eeg_data_baseline_removed

def low_pass_filter(single_patient_data, cutoff=10, order=5):
    # Compute filter coefficients
    nyq = 0.5 * single_patient_data.fs  # Nyquist Frequency
    normal_cutoff = cutoff / nyq
    b, a = butter(N=order, Wn=normal_cutoff, btype='low', analog=False)

    # Apply the filter to the signal
    filtered_signal = filtfilt(b, a, single_patient_data.data)
    single_patient_data.data = filtered_signal

    return single_patient_data

In [None]:

single_patient_data_lp = copy.copy(dataset.all_patient_data[list(dataset.all_patient_data.keys())[0]])
print(f"Selected patient is {single_patient_data_lp.file_name.split('_')[0]}")
channels = single_patient_data_lp.channel_names


single_patient_data_lp = low_pass_filter(single_patient_data_lp, cutoff=15, order=5)
single_patient_data_lp = resample(single_patient_data_lp, f_resample=30, anti_alias_filter=False)
# single_patient_data_lp = remove_baseline(single_patient_data_lp, baseline_t_min=-1000, baseline_t_max=0)

# Find the eign vector

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

componenet = 0

single_patient_data_group_egn = EEGDataSet()
new_channel_names, new_data = [], []
for group_name, channel_indices in channel_groups_indices.items():
    print(f"group {group_name}: {[all_channels[ch - 1] for ch in channel_indices]}")
    group = single_patient_data.data[:, np.array(channel_indices), :]
    new_data_channel = []
    for i in range(single_patient_data_lp.data.shape[0]):
        X = single_patient_data_lp.data[i, np.array(channel_indices), :]

        # Standardizing the data
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)

        pca = PCA(n_components=4)
        pca.fit(group[i])
        new_data_channel.append(pca.components_[componenet])
    new_data_channel = np.stack(new_data_channel, axis=0)
    new_data.append(new_data_channel[:, np.newaxis, :])
    new_channel_names.append(group_name)
new_data = np.concatenate(new_data, axis=1)
single_patient_data_group_egn.data = new_data
single_patient_data_group_egn.channel_names = new_channel_names
single_patient_data_group_egn.fs = single_patient_data.fs
single_patient_data_group_egn.labels = single_patient_data.labels
single_patient_data_group_egn.file_name = single_patient_data.file_name
single_patient_data_group_egn.time_ms = single_patient_data_lp.time_ms


In [None]:
for idx, channel_idx in enumerate(new_channel_names):
    fig, _ = visualize_block_ERP(single_patient_data_group_egn, stim='w', channel_idx=idx, axs=None, fig=None, show_plot=True)

In [None]:
time_features_group_egn = {}
start_index, end_index = get_time_idx(single_patient_data_group_egn.time_ms, 150, 250)
time_features_group_egn['n200'] = np.mean(single_patient_data_group_egn.data[:, :, start_index:end_index], axis=-1)

start_index, end_index = get_time_idx(single_patient_data_group_egn.time_ms, 250, 500)
time_features_group_egn['p300'] = np.mean(single_patient_data_group_egn.data[:, :, start_index:end_index], axis=-1)

start_index, end_index = get_time_idx(single_patient_data_group_egn.time_ms, 500, 750)
time_features_group_egn['post_300'] = np.mean(single_patient_data_group_egn.data[:, :, start_index:end_index], axis=-1)

In [None]:
for idx, channel_idx in enumerate(single_patient_data_group_egn.channel_names):
    visualize_block_features(single_patient_data_group_egn, time_features_group_egn, channel_idx=idx, stim='w', fig=None, axs=None,show_plot=True)

In [None]:

training_features = {}
for key in time_features.keys():
    for ch_idx, ch in enumerate(single_patient_data.channel_names):
        training_features[key + ' ' + ch] = time_features[key][:,ch_idx]
training_features_df = pd.DataFrame(training_features)
training_features_df.head()

stim = 'w'
block_idx = np.unique(single_patient_data.labels['block_number'])
label_df = pd.DataFrame(single_patient_data.labels)
blocks = {idx: list(np.unique(label_df[label_df['block_number'] == idx]['block_type'])) for idx in block_idx}
blocks_with_stim = [idx for idx, vals in blocks.items() if any(stim in val for val in vals)]


for i in range(len(blocks_with_stim)):
    print(f"============ Block {blocks_with_stim[i]} {blocks[blocks_with_stim[i]]} ===============")
    block_index = (label_df['block_number'] == blocks_with_stim[i]) & (label_df['is_correct'] == True) & (
            label_df['stim'] != 'ctl')
    labels = {key: label_df[key][block_index].to_list() for key in label_df.columns.to_list()}
    single_channel_feature_df = training_features_df[block_index.values].copy()
    fs = single_patient_data.fs
    time_ms = single_patient_data.time_ms
    channel_names = single_patient_data.channel_names
    file_name = single_patient_data.file_name
    single_channel_feature_df.head()

    model = xgb.XGBClassifier(objective="multi:softmax", num_class=2)
    model = linear_model.Lasso(alpha=1)
    model = SVC()
    train_kfold(X=single_channel_feature_df, y=np.array(labels['is_experienced']), model = model)

print(f"============ Combined Blocks ===============")
block_index = label_df['block_number'].isin(blocks_with_stim) & \
                  (label_df['is_correct'] == True) & \
                  (label_df['stim'] != 'ctl')
labels = {key: label_df[key][block_index].to_list() for key in label_df.columns.to_list()}
single_channel_feature_df = training_features_df[block_index.values].copy()
fs = single_patient_data.fs
time_ms = single_patient_data.time_ms
channel_names = single_patient_data.channel_names
file_name = single_patient_data.file_name
single_channel_feature_df.head()

model = xgb.XGBClassifier(objective="multi:softmax", num_class=2)
model = linear_model.Lasso(alpha=1)
model = SVC()
train_kfold(X=single_channel_feature_df, y=np.array(labels['is_experienced']), model = model)