### Lucas J. Assen
### 25-03-2025
# Section A: Transform data to the FIF format
## Loading the data in the bdf format

In [None]:
import mne
from pathlib import Path
import glob
import pyvista as pv
import numpy as np
import os

# identify the folders where the data is stored, change the paths to your own
buddhist = Path("/Users/lucas.assen/Desktop/TempData/Buddhist")
control = Path("/Users/lucas.assen/Desktop/TempData/Controls")

# import the files for each condition
buddhist_files = sorted(glob.glob(str(buddhist / "*bdf")))
control_files = sorted(glob.glob(str(control / "*bdf")))

#defining a dictionanry and function for loading and storing the data
D_0 = {"Buddhists": {}, "Controls": {}}

def load_data_files(file_list, condition):
    for file in file_list:
        Participant_ID = Path(file).stem
        print(f"Loading {Participant_ID} ({condition})...")
        raw = mne.io.read_raw_bdf(file, preload=True)
        D_0[condition][Participant_ID] = raw

load_data_files(buddhist_files, "Buddhists")
load_data_files(control_files, "Controls")

## Mapping the events within the data to the participants

In [None]:
# One of the buddhists participants is set as the raw since, as oposed to the controls, the buddhist group stim channel contains event triggers 
# for both the meditation types.
raw = D_0["Buddhists"]["3"]
events = mne.find_events(raw, stim_channel="Status")
print(events)

event_mappings = {1: "rating_poor", 
                  2: "rating_reasonable",
                  3: "rating_excellent",
                  10: "Announcement_FA_practice",
                  20: "FA_practice_start_signal",
                  30: "FA_practice_stop_signal",
                  40: "FA_rating_signal",
                  11: "Announcement_LK_practice",
                  21: "LK_practice_start_signal",
                  31: "LK_practice_stop_signal",
                  41: "LK_rating_signal",
                  99: "Part_FA_start_indicator",
                  100: "Part_LK_start_indicator",
                  199: "Part_FA_stop_indicator",
                  200: "Part_LK_stop_indicator",
                  101: "Eyes_open_signal",
                  102: "Eyes_closed_signal"}
# In this loop an iteration takes places over both conditions across all participants. It takes into account the event present in each individual
# participants' stimulus channel. Then only the event mappings relevant to the individual participant are annotated.

for condition in D_0.keys(): 
    for participant_id, raw in D_0[condition].items():
        print(f"Processing participant {participant_id} ({condition})")

        # Extract events from the current participant that is being iterated over.
        events = mne.find_events(raw, stim_channel="Status")
        unique_event_codes = np.unique(events[:, 2]) 
        filtered_event_mappings = {code: event_mappings[code] for code in unique_event_codes if code in event_mappings}
        print(f"Filtered event mappings for {participant_id}: {filtered_event_mappings}")

        # Convert events to annotations
        annot_from_events = mne.annotations_from_events(events=events,
                                                        event_desc=filtered_event_mappings,  
                                                        sfreq=raw.info["sfreq"],
                                                        orig_time=raw.info["meas_date"],)

        # Apply annotations to the raw data of the participant
        raw.set_annotations(annot_from_events)
        print(f"Annotations applied to {participant_id}.")

## Seperating the EOG channels from the EEG channels in the data


In [4]:
# As the EOG channels are not specified in the data, and are included as EEG channels, the following code is used to specify 
# the EOG channels as EOG channels.
eog_channels = ['LH', 'RH', 'LV', 'UV']

# The following iterates over both conditions to set the channel types for the EOG channels.
for condition in ["Buddhists", "Controls"]:
    for participant_id, raw in D_0[condition].items():
        raw.set_channel_types({ch: 'eog' for ch in eog_channels})


## Attaching the digitisation points for the biosemi64 to the participants

In [5]:
# Due to the bdf data format digitisation points are not automatically saved in the data. As a 10-20 biosemi64 set-up was used, the points are 
# added based on that. For the Mastiods (M1 and M2) and the Nasion points (Nz and Snz) the points are manually defined as these channels are 
# non standard additions to the biosemi64 set-up.

#The locations of the missing electrodes are defined. The locations are based on typical spots 
#for the mastoid(M1/2) and Nasion (Nz/SNz) points
custom_electrodes = {"M1": np.array([-0.08, -0.04, 0.0]),
                     "M2": np.array([0.08, -0.04, 0.0]),
                     "Nz": np.array([0.0, 0.10, 0.0]),
                     "SNz": np.array([0.0, 0.12, 0.0])}

# Load the standard Biosemi64 montage
biosemi_montage = mne.channels.make_standard_montage('biosemi64')
biosemi_digitisation = biosemi_montage._get_ch_pos()

# Here the standard positions are updated to include the manually defined additional electrodes
biosemi_digitisation.update(custom_electrodes)
updated_montage = mne.channels.make_dig_montage(ch_pos=biosemi_digitisation, coord_frame="head")

# Iterate over both groups and participants to apply the digitisation points.
for condition in ["Buddhists", "Controls"]:
    for participant_id, raw in D_0[condition].items():
        raw.set_montage(updated_montage)

## Initial filter

In [None]:
#An initial filter is applied to the data with a highpass of 0.1 Hz and a lowpass of 40 Hz.
Initial_filter_dict = {}

for condition in D_0.keys():
    Initial_filter_dict[condition] = {} 
    for participant_id, raw in D_0[condition].items():
        print(f"Filtering participant {participant_id} in condition {condition}...")
        Initial_filter = raw.copy().filter(l_freq=0.1, h_freq=40, fir_design='firwin', phase='zero-double')
        Initial_filter_dict[condition][participant_id] = Initial_filter
print("Initial filtering complete for all participants.")

## Transforming and saving the data file format

In [None]:
# The data is transformed and saved to a fif format as it is a more useful format for EEG data processing in MNE.
# Separate output directories for Buddhists and Controls, be sure to change the directory paths to your own.
buddhist_output_dir = "/Users/lucas.assen/Desktop/Master Thesis/Data/Buddhist"  
control_output_dir = "/Users/lucas.assen/Desktop/Master Thesis/Data/Controls"

os.makedirs(buddhist_output_dir, exist_ok=True)
os.makedirs(control_output_dir, exist_ok=True)

# Iterate over both groups and participants in the dictionary
for condition in ["Buddhists", "Controls"]:
    for participant_id, raw in Initial_filter_dict[condition].items():
        if condition == "Buddhists":
            output_file = os.path.join(buddhist_output_dir, f"{participant_id}.fif")
        else:
            output_file = os.path.join(control_output_dir, f"{participant_id}.fif")
        raw.save(output_file, overwrite=True)
        print(f"Saved {participant_id} ({condition}) as {output_file}")

# Section B: Pre-processing
## Loading back in the FIF transformed data

In [None]:
import mne
from pathlib import Path
import glob
import pyvista as pv
import numpy as np
import os
from mne.preprocessing import ICA, corrmap, create_ecg_epochs, create_eog_epochs
from mne import pick_types
from mne.preprocessing import EOGRegression

# Identify the folders where the data is stored, change to your own directory paths
buddhist = Path("/Users/lucas.assen/Desktop/Master Thesis/Data/Buddhist")
control = Path("/Users/lucas.assen/Desktop/Master Thesis/Data/Controls")

# Load the converted FIF files
buddhist_fif_files = sorted(glob.glob(str(buddhist / "*.fif")))
control_fif_files = sorted(glob.glob(str(control / "*.fif")))

D_0 = {"Buddhists": {}, "Controls": {}}

def load_data_files(file_list, condition):
    for file in file_list:
        Participant_ID = Path(file).stem
        print(f"Loading {Participant_ID} ({condition})...")

        raw = mne.io.read_raw_fif(file, preload=True)
        D_0[condition][Participant_ID] = raw

load_data_files(buddhist_fif_files, "Buddhists")
load_data_files(control_fif_files, "Controls")


## Rereferencing the data

In [None]:
#The data is re-referenced to the average of the TP7 and TP8 channels for all participants.
for condition in D_0.keys():  
    for participant_id, raw in D_0[condition].items():
        raw.set_eeg_reference(ref_channels=["TP7", "TP8"])

## Filter prior to the ICA per participant

In [None]:
# A pre ICA filtering is applied to the raws of each participant. These are done on copies of the raw and are stored in a new dictionary.
# Here a highpass of 1 Hz is applied to remove slow drifts at low frequencies which improves the ICA performance.
Initial_filter_dict = {}

for condition in D_0.keys():
    Initial_filter_dict[condition] = {} 
    for participant_id, raw in D_0[condition].items():
        print(f"Filtering participant {participant_id} in condition {condition}...")
        Initial_filter = raw.copy().filter(l_freq=1, h_freq=40, fir_design='firwin', phase='zero-double')
        Initial_filter_dict[condition][participant_id] = Initial_filter
print("Initial filtering complete for all participants.")


## Fitting the ICA components

In [None]:
# First select for which participant the ICA is run by changing the participant that is picked from the dictionary as Filt_ICA.
# Then ICA is ran. The number of initial components is set as 15 and a random state of 1 is picked to ensure that the same outcome is generated each time.
# Lastly the ICA is fitted.
Filt_ICA = Initial_filter_dict["Buddhists"]["1"]    
ica = ICA(n_components=15, max_iter="auto", random_state=1)
ica.fit(Filt_ICA)
ica


In [None]:
# This first part shows the overal explained variance of all ICA components combined
explained_var_ica = ica.get_explained_variance_ratio(Filt_ICA)
for channel_type, ratio in explained_var_ica.items():
    print(f"Fraction of {channel_type} variance explained by all components: {ratio}")

# Here you can select a specific component from 0-14 to see its explained variance
explained_var_per_comp = ica.get_explained_variance_ratio(Filt_ICA, components=[0], ch_type="eeg")
ratio_percent = round(100 * explained_var_per_comp ["eeg"])
print(f"Fraction of variance in EEG signal explained by first component: "
      f"{ratio_percent}%")

## Selecting the Components to be removed and applying the ICA to the original data

In [None]:
# First, select the participants for which you calculated the ICA. A topography and standard plot of the components is created. Furthermore, you can pick specific components 
# to see their properties by specifying the picks.
raw = D_0["Buddhists"]["1"]
ica.plot_sources(raw, show_scrollbars=False)
ica.plot_components()
ica.plot_properties(raw, picks=[0])

In [None]:
# After visial inspection of the components, the compents that should be excluded are selected
ica.exclude = [0, 7, 10, 12, 14]

# The excluded components are applied to the raw data and plotted to a copy to see the difference after removing the ICA
artifact_picks = mne.pick_types(raw.info, meg=False, eeg=True, exclude="bads")
raw.copy().plot()
ica.apply(raw)
raw.plot()

In [None]:
# The ICA cleaned version of the data is saved to a new folder.
# Make sure to use the right output_file depending on whether you are working with the Buddhist or Control group, and ensure the output directories is your own.
participant_id = raw.info.get('subject_info', {}).get('id', 'unknown')
buddhist_output_dir = "/Users/lucas.assen/Desktop/Master Thesis/Buddhists_ICA_clean"  
control_output_dir = "/Users/lucas.assen/Desktop/Master Thesis/Controls_ICA_clean"
#output_file = os.path.join(buddhist_output_dir, f"{participant_id}_ICA_cleaned.fif")
output_file = os.path.join(control_output_dir, f"{participant_id}_ICA_cleaned.fif")
raw.save(output_file, overwrite=True)

# Section three: Final Filtering
## Loading the data

In [None]:
# For the final filtering the data ica cleaned data is loaded. Change the directory paths to your own.

import mne
from pathlib import Path
import glob
import pyvista as pv
import numpy as np
import os
from mne.preprocessing import ICA, corrmap, create_ecg_epochs, create_eog_epochs
from mne import pick_types
from mne.preprocessing import EOGRegression

# Identify the folders where the data is stored
buddhist = Path("/Users/lucas.assen/Desktop/Master Thesis/Buddhists_ICA_clean")
control = Path("/Users/lucas.assen/Desktop/Master Thesis/Controls_ICA_clean")

# Load the converted FIF files
buddhist_fif_files = sorted(glob.glob(str(buddhist / "*.fif")))
control_fif_files = sorted(glob.glob(str(control / "*.fif")))

D_0 = {"Buddhists": {}, "Controls": {}}

def load_data_files(file_list, condition):
    for file in file_list:
        Participant_ID = Path(file).stem
        print(f"Loading {Participant_ID} ({condition})")

        raw = mne.io.read_raw_fif(file, preload=True)
        D_0[condition][Participant_ID] = raw

load_data_files(buddhist_fif_files, "Buddhists")
load_data_files(control_fif_files, "Controls")

## Applying the final filter and saving the data

In [None]:
# A filtering is applied to the raws of each participant specifying the filter to a high-pass of 0.3 Hz and a low-pass of 35 Hz. 
# These are done on copies of the ica cleaned raws and stored in a new dictionary. Change directory paths to your own.
Preprocessed_dict = {}
buddhist_output_dir = "/Users/lucas.assen/Desktop/Master Thesis/Buddhists_preprocessed"  
control_output_dir = "/Users/lucas.assen/Desktop/Master Thesis/Controls_preprocessed"

for condition in D_0.keys():
    Preprocessed_dict[condition] = {}  # Initialize a sub-dictionary for each condition
    for participant_id, raw in D_0[condition].items():
        print(f"Filtering participant {participant_id} in condition {condition}")
        Initial_filter = raw.copy().filter(l_freq=0.3, h_freq=35, fir_design='firwin', phase='zero-double')
        Preprocessed_dict[condition][participant_id] = Initial_filter 

In [None]:
# Following the final filtering the ICA cleaned and preprocessed data is saved to a new folder.
for condition in ["Buddhists", "Controls"]:
    for participant_id, raw in Preprocessed_dict[condition].items():
        if condition == "Buddhists":
            output_file = os.path.join(buddhist_output_dir, f"{participant_id}_processed.fif")
        else:
            output_file = os.path.join(control_output_dir, f"{participant_id}_processed.fif")
        raw.save(output_file, overwrite=True)
        print(f"Saved {participant_id} ({condition}) as {output_file}")

# Section four: Improved bad channel detection
## Loading the data

In [None]:
# The PREP pipeline bad channel detection methods are used to identify any remaining bad channels in the preprocessed data.
# First the data is loaded from the preprocessed folders.

import mne 
import pandas as pd
from pathlib import Path
import glob
import pyvista as pv
import numpy as np
import os
import re
import gc
from scipy import interpolate
import matplotlib.pyplot as plt
import os, warnings, numpy as np, pandas as pd, mne
from pyprep import NoisyChannels

# Identify the folders where the data is stored, change the paths to your own.
buddhist = Path("/Users/lucas.assen/Desktop/Master Thesis/Buddhists_preprocessed")
control = Path("/Users/lucas.assen/Desktop/Master Thesis/Controls_preprocessed")

# Load the converted FIF files
buddhist_fif_files = sorted(glob.glob(str(buddhist / "*.fif")))
control_fif_files = sorted(glob.glob(str(control / "*.fif")))

D_0 = {"Buddhists": {}, "Controls": {}}

def load_data_files(file_list, condition):
    for file in file_list:
        Participant_ID = Path(file).stem
        print(f"Loading {Participant_ID} ({condition})")

        raw = mne.io.read_raw_fif(file, preload=True)
        D_0[condition][Participant_ID] = raw

load_data_files(buddhist_fif_files, "Buddhists")
load_data_files(control_fif_files, "Controls")

## Using the noisy channel detection methods from the PREP pipeline

In [None]:
#The HP_TEMP variable is used to set the frequency of the high-pass filter applied to the data before running the PREP pipeline. 
#It is again advised to set it at one to remove slow drifts
#The output is a csv file containing the channels that are considered bad according to each metric included in the PREP pipeline. Change the path to your own 
#This can be subjected to visual inspection and ultimately the exclusion of the bad channels during further analyses.
HP_TEMP = 1                   
USE_RANSAC = True
OUT_CSV = "/Users/lucas.assen/Desktop/Master Thesis/bad_channel_summary.csv"

def scan_raw(raw: mne.io.BaseRaw, hp_temp=HP_TEMP) -> dict:
    raw_tmp = raw.copy()
    if hp_temp is not None:
        raw_tmp.filter(l_freq=hp_temp, h_freq=None, picks="all", verbose=False)

    nc = NoisyChannels(raw_tmp, random_state=97, do_detrend=False)
    nc.find_all_bads(ransac=USE_RANSAC)

    bad_dict = nc.get_bads(as_dict=True)     
    stats = {k: len(v) for k, v in bad_dict.items()}
    stats.update(
        all_bad=len(nc.get_bads()),
        bad_ch_names=",".join(nc.get_bads()),
        n_total=len(raw.ch_names),)
    return stats

# The scan_raw function is iterated over all participants and the output is stored in csv. 
# A subsection of the output is printed in order to conirm whether it is correct. 

rows = []
for group, subdict in D_0.items():
    print(f"\n=== {group} ===")
    for pid, raw in subdict.items():
        print(f"→ {pid}", end="  ")
        try:
            row = scan_raw(raw)
        except Exception as e:
            warnings.warn(f"{pid} failed: {e}")
            continue
        rows.append({"participant_id": pid, "group": group, **row})
        print(f"bad={row['all_bad']} / {row['n_total']}")

summary = pd.DataFrame(rows).sort_values(["group", "participant_id"])
summary.to_csv(OUT_CSV, index=False)
summary.head()