# PCA of the cleaned epoched EEG data


Ref: Hebart et al. eLife 2018;7:e32816. DOI: https://doi.org/10.7554/eLife.32816

In [1]:
import gc
import glob
import matplotlib.pyplot as plt
import mne
import numpy as np
import os
import pandas as pd
import re
from scipy.ndimage import gaussian_filter1d
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

In [2]:
#set up the datasourcs and folder structure
datasrc = "/net/store/nbp/projects/GTI_decoding/data/pilot"

#folder structure: data/pilot/01_subjects_data/P001/eeg/preprocessed/04_PCA
dir_struct = {
    'dir_subj': "01_subjects_data",
    'dir_gen': "02_general_exp_data",
    'filtered': "eeg/preprocessed/01_filtered",
    'epoched': "eeg/preprocessed/02_epoched",
    'cleaned': "eeg/preprocessed/03_cleaned",
    'pca': "analysis/pca",
    'rawepochs': "eeg/preprocessed/99_epochs_raw"
    }
subj_list = [x for x in os.listdir(os.path.join(datasrc,dir_struct['dir_subj'])) if re.match(r'[P,S][\d]+', x)] 

In [3]:
results = []
for s in subj_list:
    epoch_folder = os.path.join(datasrc,dir_struct['dir_subj'],s,dir_struct['epoched'])
    epoch_files = glob.glob(os.path.join(epoch_folder,"*_epo.fif"))
    epoch_list = [mne.read_epochs(x, preload=False,verbose=False) for x in epoch_files]
    epoched_data = mne.concatenate_epochs(epoch_list,add_offset=False)
    del epoch_list
    gc.collect()
    X = epoched_data.get_data()
    pca = mne.decoding.UnsupervisedSpatialFilter(PCA(0.99), average=False)
    pca_data = pca.fit_transform(X)
    pca_epoch = mne.EpochsArray(pca_data,
                            info=mne.create_info(pca_data.shape[1], epoched_data.info['sfreq'],ch_types='eeg'),
                            tmin=-0.1,
                            events=epoched_data.events,
                            event_id=epoched_data.event_id)
    pca_epoch.save(os.path.join(datasrc,dir_struct['dir_subj'],dir_struct['pca'], f"{s}_epo.fif"), overwrite=True)
    results.append("Subject: {} Original Channels: {} PCA Components: {}\n".format(s,X.shape[1],pca_data.shape[1]))
    

  epoched_data = mne.concatenate_epochs(epoch_list,add_offset=False)


Not setting metadata
Not setting metadata
143 matching events found
No baseline correction applied
0 projection items activated
0 bad epochs dropped
Overwriting existing file.


  pca_epoch = mne.EpochsArray(pca_data,
  epoched_data = mne.concatenate_epochs(epoch_list,add_offset=False)


Not setting metadata
Not setting metadata
288 matching events found
No baseline correction applied
0 projection items activated
0 bad epochs dropped
Overwriting existing file.


  pca_epoch = mne.EpochsArray(pca_data,
  epoched_data = mne.concatenate_epochs(epoch_list,add_offset=False)


Not setting metadata
Not setting metadata
288 matching events found
No baseline correction applied
0 projection items activated
0 bad epochs dropped
Overwriting existing file.


  pca_epoch = mne.EpochsArray(pca_data,
  epoched_data = mne.concatenate_epochs(epoch_list,add_offset=False)


Not setting metadata
Not setting metadata
240 matching events found
No baseline correction applied
0 projection items activated
0 bad epochs dropped
Overwriting existing file.


  pca_epoch = mne.EpochsArray(pca_data,
  epoched_data = mne.concatenate_epochs(epoch_list,add_offset=False)


Not setting metadata
Not setting metadata
179 matching events found
No baseline correction applied
0 projection items activated
0 bad epochs dropped
Overwriting existing file.


  pca_epoch = mne.EpochsArray(pca_data,
  epoched_data = mne.concatenate_epochs(epoch_list,add_offset=False)


Not setting metadata
Not setting metadata
288 matching events found
No baseline correction applied
0 projection items activated
0 bad epochs dropped
Overwriting existing file.


  pca_epoch = mne.EpochsArray(pca_data,
  epoched_data = mne.concatenate_epochs(epoch_list,add_offset=False)


Not setting metadata
Not setting metadata
288 matching events found
No baseline correction applied
0 projection items activated
0 bad epochs dropped
Overwriting existing file.


  pca_epoch = mne.EpochsArray(pca_data,
  epoched_data = mne.concatenate_epochs(epoch_list,add_offset=False)


Not setting metadata
Not setting metadata
240 matching events found
No baseline correction applied
0 projection items activated
0 bad epochs dropped
Overwriting existing file.


  pca_epoch = mne.EpochsArray(pca_data,
  epoched_data = mne.concatenate_epochs(epoch_list,add_offset=False)


Not setting metadata
Not setting metadata
288 matching events found
No baseline correction applied
0 projection items activated
0 bad epochs dropped
Overwriting existing file.


  pca_epoch = mne.EpochsArray(pca_data,


In [4]:
for r in results:
    print(r)

Subject: P001 Original Channels: 61 PCA Components: 36

Subject: P002 Original Channels: 61 PCA Components: 37

Subject: P003 Original Channels: 61 PCA Components: 1

Subject: P004 Original Channels: 61 PCA Components: 42

Subject: S001 Original Channels: 63 PCA Components: 2

Subject: S002 Original Channels: 63 PCA Components: 37

Subject: S003 Original Channels: 63 PCA Components: 22

Subject: S004 Original Channels: 63 PCA Components: 25

Subject: S005 Original Channels: 63 PCA Components: 42



more then 50% of the components removed for subjects P003,S001,S003,S004