# Processing of Chord-Oddball data

This notebook is used for obtaining the cleaned data and processes the raw data of the chord-oddball dataset to initiate the analysis. Guide for artifact removal (https://neuraldatascience.io/7-eeg/erp_artifacts.html).

We recommend running this code with the default env values which one can retrieve from the .env.example file.

In [None]:
import mne
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from autoreject import AutoReject
import json
from dotenv import load_dotenv

import auc

from pyprep.find_noisy_channels import NoisyChannels

import custom.misc as misc
import custom.preprocessing as prep

matplotlib.use('Qt5Agg')

%matplotlib qt

load_dotenv()

SUBJECT = os.getenv("SUBJECT")
BIDS_ROOT = os.getenv("BIDS_ROOT")
TASK = os.getenv("TASK")
SUBJECT = os.getenv("SUBJECT")
SUPRESS_BIDS_OUTPUT = (os.getenv('SUPRESS_BIDS_OUTPUT', 'False') == 'True')
PROMPT_BADS = (os.getenv('PROMPT_BADS', 'False') == 'True')
USE_ICA_JSON = (os.getenv('USE_ICA_JSON', 'False') == 'True')
ICA_MANUAL = (os.getenv('ICA_MANUAL', 'False') == 'True')
Z_SCORE_REJECT = (os.getenv('Z_SCORE_REJECT', 'False') == 'True')
PYPREP_REJECT = (os.getenv('PYPREP_REJECT', 'False') == 'True')
AUTOREJECT = (os.getenv('AUTOREJECT', 'False') == 'True')

print(f"""
      SUBJECT: {SUBJECT}
      BIDS_ROOT: {BIDS_ROOT}
      TASK: {TASK}
      SUBJECT: {SUBJECT}
      SUPRESS_BIDS_OUTPUT: {SUPRESS_BIDS_OUTPUT}
      PROMPT_BADS: {PROMPT_BADS}
      USE_ICA_JSON: {USE_ICA_JSON}
      ICA_MANUAL: {ICA_MANUAL}
      Z_SCORE_REJECT: {Z_SCORE_REJECT}
      PYPREP_REJECT: {PYPREP_REJECT}
      AUTOREJECT: {AUTOREJECT}
      """)


In [None]:
# reduce bids eeg data
if not os.path.isfile(f"./data/fifs/processed_{SUBJECT}_raw.fif"):
    if SUPRESS_BIDS_OUTPUT:
        with misc.suppress_stdout_stderr():
            raw, bids_path = misc.read_raw_data(SUBJECT)
    else:
        raw, bids_path = misc.read_raw_data(SUBJECT)
        
    channel_types = {ch: 'eeg' for ch in raw.ch_names}
    raw.set_channel_types(channel_types)
    
    elec_data = pd.read_csv('./data/ds003570/sub-'+SUBJECT+'/eeg/sub-'+SUBJECT+'_task-AuditoryOddballChords_electrodes.tsv', sep='\t')
    # ensure that the electrode names and positions are correctly used
    montage = mne.channels.make_dig_montage(ch_pos=dict(zip(elec_data['name'], elec_data[['x', 'y', 'z']].values)),
                                        coord_frame='head')
    raw.set_montage(montage)
    blocks = prep.split_in_blocks(raw.copy())

    if os.path.isfile("./data/bad_channels.json"):
        bads = json.load(open("./data/bad_channels.json"))
        blocks = prep.set_bad_channels_from_json(blocks, bads)
    else:
        bads = misc.create_bad_json_structure()

    if os.path.isfile("./data/bad_ica_components.json"):
        ica_bads = json.load(open("./data/bad_ica_components.json"))
    else:
        ica_bads = misc.create_bad_json_structure()
    
    # deprecated, do not use
    if Z_SCORE_REJECT == True:
        for b in blocks:
            # reject by z-score (autoreject is more sophisticated, but only works on epochs and is really slow)
            b.info['bads'].extend(prep.mark_bad_channels_by_z_score(b, threshold=5.0, window_size=10000))
            print(f"Bad channels: {b.info['bads']}")

    if PYPREP_REJECT == True:
        for b in blocks:
            nc = NoisyChannels(b, random_state=42) # of course it has to be 42!
            nc.find_all_bads(ransac=False)
            b.info['bads'].extend(nc.get_bads())
            print(f"Bad channels: {b.info['bads']}")

    if PROMPT_BADS == True:
        for b in blocks:
            b.plot(n_channels=64)
            plt.show(block=True)
            bads[f"sub-{SUBJECT}"][f"{blocks.index(b)+1}"] = b.info['bads']
    
    
    with open("./data/bad_channels.json", "w") as f:
        json.dump(bads, f)

    ica_blocks = []

    # separate preprocessing in 8 blocks as boundary events occured 8 times in the whole recording
    for b in blocks:
        b.interpolate_bads()
        prep_block = prep.basic_preprocessing(b.copy())

        # ICA
        ica_block = prep.get_ica(prep_block, ica_bads, f"{blocks.index(b)+1}", montage)
        ica_blocks.append(ica_block)
    
    with open("./data/bad_ica_components.json", "w") as f:
        json.dump(ica_bads, f)

    prep_raw = mne.concatenate_raws(ica_blocks)
    misc.save_preprocessed_data(f"./data/fifs/processed_{SUBJECT}_raw.fif", prep_raw)

else:
    prep_raw = mne.io.read_raw_fif(f"./data/fifs/processed_{SUBJECT}_raw.fif", preload=True)

prep_raw.info

## Epochs

In [None]:
raw_subselect = prep_raw.copy()
raw_subselect.annotations

standard_epochs = prep.get_epochs_from_events(raw_subselect, '_S')
# deviant exemplar: a chord that is of the same function as the standard chord, but a different exemplar
# we threshold the reaction time to 200ms as faster reactions are expected to be anticipatory
exemplar_epochs = prep.get_epochs_from_events(raw_subselect, '_deviantEcorrect_E', min_reaction_s=0.2)
# deviant function: a chord that is of a whole different function than the standard chord
function_epochs = prep.get_epochs_from_events(raw_subselect, '_deviantFcorrect_F', min_reaction_s=0.2)

autoreject_file = f"./data/autoreject_info_{SUBJECT}.json"

if AUTOREJECT:
    if os.path.exists(autoreject_file):
        with open(autoreject_file, 'r') as f:
            autoreject_info = json.load(f)

        # apply autoreject results to save time if already computed earlier
        standard_epochs_clean = prep.apply_autoreject_info(standard_epochs, autoreject_info['standard'])
        exemplar_epochs_clean = prep.apply_autoreject_info(exemplar_epochs, autoreject_info['exemplar'])
        function_epochs_clean = prep.apply_autoreject_info(function_epochs, autoreject_info['function'])

    else:
        # takes a long time!
        ar_standard = AutoReject()
        ar_exemplar = AutoReject()
        ar_function = AutoReject()

        # apply autoreject (takes 3-6 min)
        autoreject_info = {}

        standard_epochs_clean, reject_log_standard = ar_standard.fit_transform(standard_epochs, return_log=True)
        autoreject_info['standard'] = {
            'bad_epochs': reject_log_standard.bad_epochs,
            'reject_log': reject_log_standard.labels.tolist(),
            'threshes': ar_standard.get_reject_log(standard_epochs).threshes_
        }

        exemplar_epochs_clean, reject_log_exemplar = ar_exemplar.fit_transform(exemplar_epochs, return_log=True)
        autoreject_info['exemplar'] = {
            'bad_epochs': reject_log_exemplar.bad_epochs,
            'reject_log': reject_log_exemplar.labels.tolist(),
            'threshes': ar_exemplar.get_reject_log(exemplar_epochs).threshes_
        }

        function_epochs_clean, reject_log_function = ar_function.fit_transform(function_epochs, return_log=True)
        autoreject_info['function'] = {
            'bad_epochs': reject_log_function.bad_epochs,
            'reject_log': reject_log_function.labels.tolist(),
            'threshes': ar_function.get_reject_log(function_epochs).threshes_
        }

        # save to json per sub
        with open(autoreject_file, "w") as f:
            json.dump(autoreject_info, f, indent=4)
else:
    standard_epochs_clean = standard_epochs
    exemplar_epochs_clean = exemplar_epochs
    function_epochs_clean = function_epochs

## AUC ROC Curve

In [None]:
# Compute the AUC ROC Curve per subject, using the same step and window size as in the paper
step = 3
window = 7

roc_exemplar = auc.generate_AUC_ROC_sliding_window(standard_epochs_clean.get_data(), exemplar_epochs_clean.get_data(), window, step)
roc_function = auc.generate_AUC_ROC_sliding_window(standard_epochs_clean.get_data(), function_epochs_clean.get_data(), window, step)
time = [i*step/128 -0.4 for i in range(len(roc_exemplar))]

# Plot the Curve
plt.plot(time, roc_exemplar, label="exemplar")
plt.plot(time, roc_function, label="function")
plt.axvline(-0.4, label="Chord 1")
plt.axvline(0, label="Chord 2")
plt.axvline(0.4, label="Chord 3")
plt.ylabel("AUC-Value")
plt.xlabel("Time in seconds")
plt.legend()
plt.show()

# Save data
np.savetxt(f"./data/auc/auc_roc_sl_7_3_{SUBJECT}_TEST.txt", np.column_stack((roc_exemplar, roc_function)))


## Forward Model

In [None]:
# Compute the average forward model over the significant time intervall, using the same step and window size as in the paper
step = 3
window = 7

# [36,37,38,39,40,41,42,43,44,45] are the indices of timepoints, which were found significant in statistical_analysis.ipynb
forward_model_exemplar = auc.generate_forward_model_sw(standard_epochs_clean.get_data(), exemplar_epochs_clean.get_data(), [36,37,38,39,40,41,42,43,44,45], window, step)
forward_model_function = auc.generate_forward_model_sw(standard_epochs_clean.get_data(), function_epochs_clean.get_data(), [36,37,38,39,40,41,42,43,44,45], window, step)


info = raw_subselect.info

data_exemplar = forward_model_exemplar.reshape(-1, 1)
evoked_exemplar = mne.EvokedArray(data_exemplar, info)
evoked_exemplar.plot_topomap(times=[0], time_unit='s', ch_type='eeg')
plt.show()

data_function = forward_model_function.reshape(-1, 1)
evoked_function = mne.EvokedArray(data_function, info)
evoked_function.plot_topomap(times=[0], time_unit='s', ch_type='eeg')
plt.show()

# Save data
np.savetxt(f"./data/forward_model/forward_model_7_3_{SUBJECT}_TEST.txt", np.column_stack((forward_model_exemplar, forward_model_function)))