In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy as sp
import os
import subprocess
import mne
from pathlib import Path
import regex as re
from functools import reduce


# Data Download

In [None]:
download_cmd = ['bash', os.path.join('EEG_human', 'ds004504-1.0.8.sh')]
subprocess.run(download_cmd)


# Data Processing and EDA

In [None]:
def get_all_set(directory):
  """
  Gets a list of all .set files in a directory and its subdirectories.

  Args:
    directory: The path to the directory.

  Returns:
    A list of file paths.
  """
  directory = Path(directory)

  # Search recursively for .set files
  set_files = list(directory.rglob("*.set"))
  return set_files

In [None]:
eeg_data = mne.io.read_raw_eeglab((os.path.join('EEG_human', 'ds004504-1.0.8', 'sub-001', 'eeg', 'sub-001_task-eyesclosed_eeg.set')), preload=True)
# Convert to Pandas DataFrame
df = eeg_data.to_data_frame()
df.set_index('time', inplace=True)
df.columns = 'sub-001-' + df.columns
df = df[df.columns[:3]]

In [None]:
sub_meta = pd.read_table(os.path.join('EEG_human', 'ds004504-1.0.8','participants.tsv'))
all_eeg_recordings = get_all_set(os.path.join('EEG_human', 'ds004504-1.0.8', 'derivatives'))
all_control_dfs = []
all_ad_dfs = []
all_ftd_dfs = []
for fp in all_eeg_recordings:
    try:
        temp_eeg_data = mne.io.read_raw_eeglab(fp, preload=True)
        temp_df = temp_eeg_data.to_data_frame()
        temp_df.set_index('time', inplace=True)
        # Get subject ID
        subject = re.findall(r'sub-\d{3}', str(fp))[0]
        # Get subject group (C, A, F)
        subject_group = sub_meta[sub_meta['participant_id'] == subject].iloc[0]['Group']
        # Rename columns to contain subject ID
        temp_df.columns = f'{subject}-' + temp_df.columns
        # Keep only first three recording channels
        temp_df = temp_df[temp_df.columns[:3]]
        if subject_group == 'C':
            all_control_dfs.append(temp_df)
        if subject_group == 'A':
            all_ad_dfs.append(temp_df)
        elif subject_group == 'F':
            all_ftd_dfs.append(temp_df)
    except Exception as e:
        print(f'Error: {e}')
all_control_subjects = reduce(lambda x, y: pd.merge(x, y, left_index=True, right_index=True, how='outer'), all_control_dfs)
all_ad_subjects = reduce(lambda x, y: pd.merge(x, y, left_index=True, right_index=True, how='outer'), all_ad_dfs)
all_ftd_subjects = reduce(lambda x, y: pd.merge(x, y, left_index=True, right_index=True, how='outer'), all_ftd_dfs)
all_control_subjects.to_csv(os.path.join('EEG_human', 'control_eeg_all.csv'))
all_ad_subjects.to_csv(os.path.join('EEG_human', 'ad_eeg_all.csv'))
all_ftd_subjects.to_csv(os.path.join('EEG_human', 'ftd_eeg_all.csv'))

In [None]:
AD_EEG_Data = pd.read_csv(os.path.join("EEG_human","ad_eeg_all.csv"))
Control_EEG_Data = pd.read_csv(os.path.join("EEG_human","control_eeg_all.csv"))
ftd_EEG_Data = pd.read_csv(os.path.join("EEG_human","ftd_eeg_all.csv"))



Preprocessing that has already been done:

Only the derivatives folder, where the preprocessed data is kept, is covered by this section. The following is the EEG signals’ preprocessing pipeline. 

* The signals were **re-referenced to the average value of A1-A2** (reference electrodes placed on mastoids) after applying a **Butterworth band-pass filter with a frequency range of 0.5 to 45 Hz**. 
* The signals were then subjected to the **ASR routine, an automatic artifact reject technique that can eliminate persistent or large-amplitude artifacts, which removed bad data periods that exceeded the maximum acceptable 0.5 s window standard deviation of 17 (which is regarded as a conservative window)**. 
* **The ICA method (RunICA algorithm) was then used to convert the 19 EEG signals to 19 ICA components [19]**. ICA components categorized as “eye artifacts” or “jaw artifacts” by the EEGLAB platform’s automatic classification method “ICLabel” were automatically excluded. 

It should be mentioned that, even though the recording was done in a resting state with the eyes closed, eye movement artifacts were still identified in certain EEG recordings. Figure 2 represents a snapshot of the same signal in raw form, and in preprocessed form. It can be observed that severe high frequency artifacts have been removed and baseline correction has been applied.

![alt text](images/data_processing.JPG)

[Miltiadous, A., Tzimourta, K. D., Afrantou, T., Ioannidis, P., Grigoriadis, N., Tsalikakis, D. G., Angelidis, P., Tsipouras, M. G., Glavas, E., Giannakeas, N., & Tzallas, A. T. (2023). A Dataset of Scalp EEG Recordings of Alzheimer’s Disease, Frontotemporal Dementia and Healthy Subjects from Routine EEG. Data, 8(6), 95. https://doi.org/10.3390/data8060095](https://www.mdpi.com/2306-5729/8/6/95)

In [None]:
# Make nx3 plot
def vis_class_channel(df, group):
    channels = ['Fp1', 'Fp2', 'F3']
    temp_fig, temp_ax = plt.subplots(len(list(filter(lambda x: 'Fp1' in x, df.columns))), len(channels), figsize=(30, 60), sharex=True, sharey=True)
    for i in range(len(channels)):
        fp_cols = list(filter(lambda x: channels[i] in x, df.columns))
        temp_df = df[['time'] + fp_cols].dropna()
        for j in range(len(fp_cols)):
            sub = fp_cols[j]
            temp_ax[j][i].plot(temp_df['time'], temp_df[sub])
            temp_ax[j][i].set_xlabel('Time (s)')
            temp_ax[j][i].set_ylabel('Signal (µV)')
            temp_ax[j][i].set_title(sub)
    temp_fig.suptitle(f'{channels[i]} channel for {group} group')    
    temp_fig.tight_layout(rect=[0, 0.03, 1, 0.98])


In [None]:
vis_class_channel(Control_EEG_Data, 'Control')

In [None]:
vis_class_channel(AD_EEG_Data, 'Alzheimers')


# Power Spectrum Analysis

In [None]:
Fs = 500
T20 = 20
T60 = 60
T300 = 300

AD_EEG_Data_20 = AD_EEG_Data[AD_EEG_Data['time'] <= T20]
Control_EEG_Data_20 = Control_EEG_Data[Control_EEG_Data['time'] <= T20]
ftd_EEG_Data_20 = ftd_EEG_Data[ftd_EEG_Data['time'] <= T20]

AD_EEG_Data_60 = AD_EEG_Data[AD_EEG_Data['time'] <= T60]
Control_EEG_Data_60 = Control_EEG_Data[Control_EEG_Data['time'] <= T60]
ftd_EEG_Data_60 = ftd_EEG_Data[ftd_EEG_Data['time'] <= T60]

AD_EEG_Data_300 = AD_EEG_Data[AD_EEG_Data['time'] <= T300]
Control_EEG_Data_300 = Control_EEG_Data[Control_EEG_Data['time'] <= T300]
ftd_EEG_Data_300 = ftd_EEG_Data[ftd_EEG_Data['time'] <= T300]

