
# Notebook #0: Preprocessing

    NOTE: The raw EEG data are not provided in this repository.
    This means you won't be able to run this notebook (0.Preprocess.ipynb),
    or the one that comes after it (1.Merge.ipynb).
    These files are provided for completeness, and in the hope they might
    be of use to other researchers.

    Instead, we've included the preprocessed, epoched, and combined data as
    - data/trial_epo.fif: Stimulus-locked epochs
    - data/response_epo.fif: Response-locked epochs
    
    To start re-running the analyses, begin with 2.ERPs.ipynb.
    

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import mne
from mne import io
import pandas as pd
import os
import matplotlib as mpl
import seaborn as sns

import sys
sys.path.append('src')
import eegf # My generic EEG Functions
import functions # Specific to this analysis

mpl.rcParams['font.size'] = 20
mpl.rcParams['axes.titlesize'] = 'medium'
mpl.rcParams['axes.labelsize'] = 'medium'
mpl.rcParams['xtick.labelsize'] = 'medium'
mpl.rcParams['ytick.labelsize'] = 'medium'
mpl.rcParams['legend.fontsize'] = 'medium'
mpl.rcParams['figure.titlesize'] = 'medium'
mpl.rcParams['figure.figsize'] = (8, 6)
mpl.rc('font', family='DejaVu Sans')
mpl.rcParams['svg.fonttype'] = 'none'
million = 1000000.

## Path where raw data is stored.
## Should have subdirectory called bdf/
datapath = '/media/eoin/Seagate Expansion Drive/RouletteEEG/data'
%mkdir -p data
%mkdir -p data/csv
%mkdir -p annotations
## csv files should be copied to data/csv/
## Uncomment these lines to do it (I don't know if this works on windows)
# import shutil
# shutil.copytree(os.path.join(datapath, 'csv'), 'data/csv')

print('Python version:')
print(sys.version)

print('MNE version:')
print(mne.__version__)



In [None]:
%matplotlib qt

## Load and prepare data

In [None]:
## Run this notebook interactively for each participant.
subject = 1001

In [None]:
if not os.path.exists('data/csv'):
    os.mkdir('data/csv')
subject_

In [None]:
raw = io.read_raw_edf(os.path.join(datapath, 'bdf/%i.bdf' % subject),
                     montage=mne.channels.read_montage('biosemi64'),
                     stim_channel=-1)
eogs = [u'ELL', u'ERS', u'ERL', u'ERI']
mne.rename_channels(raw.info, dict(zip(raw.ch_names[32:38], ['ML', 'MR'] + eogs)))

d = {}
d.update( dict(zip(eogs, ['eog']*len(eogs))))
d.update( dict(zip(raw.ch_names[:32], ['eeg']*32)))
raw.set_channel_types(d)

In [None]:
## This cell takes a while.
raw = (
    raw
    .load_data()
    .drop_channels(['EXG7', 'EXG8'])
    .set_eeg_reference(['ML', 'MR'])
    .drop_channels(['ML', 'MR'])
    .filter(.05, 50, picks=range(36))
    .resample(125)
    .apply_function(lambda x: mne.filter.notch_filter(x, Fs = 125, freqs=[50]), verbose=False, picks=range(36))
)

In [None]:
## To trim data, plot raw signal from FCz, and a line whereever trigger 2 (trial start) occurs
## This will highlight any obviously faulty segments in the data.
## You can use the manual tool below to mark segments you find here.
t = raw.times
X = raw.get_data()
decim = 100
x = X[15, ::decim]
plt.plot(t[::decim], x - x.mean(0) )

plt.vlines(t[X[-1]==4], -.0001, .0001, alpha=.5)
plt.show()

In [None]:
## Check if annotations already exist for this subject
def add_annotations(raw, starts, ends):
    lengths = np.array(ends) - np.array(starts)
    return mne.Annotations(starts, lengths, ['bad_segment']*len(starts), orig_time=raw.info['meas_date'])

def concat_annotations(list_of_annots, raw):
    starts = []
    ends = []
    for A in list_of_annots:
        if A is not None:
            starts += list(A.onset)
            ends += list(A.onset + A.duration)
    return add_annotations(raw, starts, ends)

fn = 'annotations/%i-annot.fif' % subject
if os.path.exists(fn):
    txt = 'Annotations file found at %s.\nDo you want to load it?\ny/n...'
    resp = raw_input(txt)
    if resp.lower() == 'y':
        old_annotations = mne.read_annotations(fn)
        raw.annotations = concat_annotations([raw.annotations, old_annotations], raw)

In [None]:
## Plot again, with annotated segments in red, if any.
## You can re-run this after using the manual annotation tool below.
plt.plot(t[::decim], x - x.mean(0) )
plt.vlines(t[X[-1]==2], -.0001,.0001, alpha=.5)
for onset, dur in zip(raw.annotations.onset, raw.annotations.duration):
    plt.fill_between([onset, onset+dur], y1=-.0005, y2=.0005, color='r', alpha=.5)
plt.show()

In [None]:
## Manually annotate data.
## Depending on data quality and requirements, you might reduce the duration argument 
## to zoom in on shorter temporal windows.
raw.plot(duration=120, n_channels=37, show_options=True, block=True, event_id={'X':4})

In [None]:
## Save the annotations
if raw.annotations is not None:
    txt = 'Save annotations to file?\ny/n...'
    resp = raw_input(txt)
    if resp.lower() == 'y':
        raw.annotations.save(fn)
    

# ICA to remove artefacts

In [None]:
## Let's extract provisional epochs to check for artefacts.
events = mne.find_events(raw)
event_id, tmin, tmax = {'trial_start': 2}, -.5, 3
reject = {'eeg': 600 / million}
all_event_epochs_d = dict(events=events, event_id=event_id, 
                          tmin=tmin, tmax=tmax, baseline=(-.1, 0.),
                          reject=reject)
all_event_epochs = mne.Epochs(raw, **all_event_epochs_d).load_data()

In [None]:
## Plot epochs, including artefacts.
all_event_epochs.average().plot_joint();

In [None]:
## Raw data at Cz
X = all_event_epochs.get_data()
plt.figure()
functions.rawplot(X * million, all_event_epochs.times, ch=15)
plt.show()

In [None]:
## Covariance matrix across channels.
## Are the early (frontal) channels high-variance?
X = all_event_epochs.get_data()[:, :32]
_r = [np.cov(X[i]) for i in range(len(X))]
r = np.array(_r).mean(0)
plt.figure()
sns.heatmap(r, cmap='jet', vmin=0, vmax=r.max(), center=0)
plt.show()

## Trying repeated ICA

In [None]:
eeg_picks = range(32)
eog_picks = mne.pick_types(raw.info, eeg=False, eog=True)
ica = mne.preprocessing.ICA(n_components=len(eeg_picks), method='fastica', random_state=12345)
ica = ica.fit(all_event_epochs, picks=range(32), decim=3, reject=dict(eeg=.0005))

In [None]:
%mkdir -p ica_plot ## Save ica plots for posterity.
p = ica.plot_components(inst=all_event_epochs, picks=range(32))
p.savefig('ica_plot/S%i.png' % subject)

In [None]:
## Enter the labels of the ICA components to remove here.
ica_to_remove = [
    0, 12
]
## Safety check
try:
    if old_ica_to_remove == ica_to_remove:       
        print('WARNING! ARE THOSE THE VALUES FROM THE LAST PARTICIPANT?')
    old_ica_to_remove = ica_to_remove
except NameError:
    old_ica_to_remove = ica_to_remove

In [None]:
## Do the removal
ica.exclude = ica_to_remove
raw = ica.apply(raw)

In [None]:
## Extract the epochs again. They should be clean.
events = mne.find_events(raw)
event_id, tmin, tmax = {'trial_start': 2}, -.5, 3
d = dict(events=events, event_id=event_id, tmin=tmin, tmax=tmax, baseline=(-.1, 0.), reject=None)
trial_epochs = mne.Epochs(raw, **d).load_data()

In [None]:
plt.figure()
X = trial_epochs.get_data() * million
functions.rawplot(X, trial_epochs.times, ch=15)
plt.show()

In [None]:
mkdir -p subject_plots/trial_epochs/

In [None]:
erp = trial_epochs.average().crop(-.1, 2)
times = np.array([0, .125, .3, .5])
eegf.plot_joint(erp, times[times > 0], 
                title='Stimulus-locked ERP - %i' % subject, 
                save='subject_plots/trial_epochs/%i.png' % subject);

Optionally, save the cleaned-up raw data

In [None]:
# %mkdir -p 'data/cleaned/'
# raw.save('data/cleaned/%i_raw.fif' % subject, overwrite=True)

# Get epochs

In [None]:
fn = glob('data/csv/%i*.csv' % subject)
fn

In [None]:
data = functions.load_subject_csv(subject)
data.head()

In [None]:
%mkdir -p 'data/trial_epochs'
%mkdir -p 'data/response_epochs'

In [None]:
events = mne.find_events(raw)
event_id, tmin, tmax = {'trial_start': 2}, -1, 3
d = dict(events=events, event_id=event_id, tmin=tmin, tmax=tmax, baseline=(-.1, 0.), reject=None)
trial_epochs = mne.Epochs(raw, **d).load_data()
trial_epochs.metadata = functions.exclude_dropped_metadata(data, trial_epochs)
trial_epochs.save('data/trial_epochs/%i_epo.fif' % subject)

In [None]:
event_id, tmin, tmax = {'response': 3}, -3, .5
d = dict(events=events, event_id=event_id, tmin=tmin, tmax=tmax, baseline=(-3, -2.9), reject=None)
response_epochs = mne.Epochs(raw, **d).load_data()
response_epochs.metadata = functions.exclude_dropped_metadata(data[data['action']==1], response_epochs)
response_epochs.save('data/response_epochs/%i_epo.fif' % subject)

# Single subject analyses

Create a whole load of ERP plots for this subject.

In [None]:
%matplotlib inline

In [None]:
trial_epochs.drop_bad(reject={'eeg': 200/million})
response_epochs.drop_bad(reject={'eeg': 200/million})

In [None]:

def do_contrast(epochs, qs, labs, ch=15, lp=10):
    for q, lab in zip(qs, labs):
        e = epochs[q].filter(None, lp)
        X = e.get_data()
    #     plt.plot(e.times, X[:, 15].mean(0), label=lab)
        eegf.plot_mean_sem(X[:, ch], e.times, label=lab)
    plt.legend()
    eegf.flipy()
    plt.show()

In [None]:
eegf.mkdir('subject_plots/responses/')
eegf.mkdir('subject_plots/trial_epochs/')
eegf.mkdir('subject_plots/trial_epochs_csd/')
eegf.mkdir('subject_plots/response_epochs/')
eegf.mkdir('subject_plots/response_epochs_csd/')
eegf.mkdir('subject_plots/trial_epochs_csd_img/')
eegf.mkdir('subject_plots/response_epochs_csd_img/')

In [None]:
r = data[data['visible']==1].groupby(['v_win', 'p_win']).mean()['response'].reset_index()
rp = r.pivot_table('response', 'v_win', 'p_win')
sns.heatmap(rp, cmap='seismic')
eegf.flipy()
plt.savefig('subject_plots/responses/%i.png' % subject)
plt.show()

In [None]:
## How often do they bet on guess trials?
data.loc[data['visible']==0, 'response'].mean()

In [None]:
erp = trial_epochs.average().crop(-.1, 2)
times = functions.get_gfp_peaks(erp, lp=3)
eegf.plot_joint(erp, times[times > 0], 
                title='Stimulus-locked ERP - %i' % subject, 
                save='subject_plots/trial_epochs/%i.png' % subject);

In [None]:
trial_epochs_csd = eegf.surface_laplacian(trial_epochs, m=5)

In [None]:
erp = trial_epochs_csd.average().crop(-.1, 2)
times = functions.get_gfp_peaks(erp, lp=3)
eegf.plot_joint(erp, times[times > 0], 
                title='CSD Stimulus-locked ERP - %i' % subject, 
                save='subject_plots/trial_epochs_csd/%i.png' % subject);

In [None]:
epochs = trial_epochs_csd#['action==1']
rt_order = np.argsort(epochs.metadata['rt'])
fig = plt.figure(figsize=(12, 8))
g = mne.viz.plot_epochs_image(epochs, 15, 
                              fig=fig,
                              #vmin=-100, vmax=100,
                              order=rt_order,
                              overlay_times=epochs.metadata['rt']);
g[0].savefig('subject_plots/response_epochs_csd_img/%i.png' % subject)

In [None]:
qs = ['visible==1 & difficult==0', 'visible==1 & difficult==1', 'visible==0']
labs = ['Easy', 'Hard', 'Guess']
do_contrast(trial_epochs_csd, qs, labs)

In [None]:
do_contrast(trial_epochs_csd, ['action==0', 'action==1'], ['Wait', 'Act'])

In [None]:
do_contrast(trial_epochs_csd, ['response==0', 'response==1'], ['Pass', 'Bet'])

In [None]:
erp = response_epochs.average().crop(-2, .1).apply_baseline((-2, -1.75))
eegf.plot_joint(erp, [-.1, 0], 
                title='Response-locked ERP - %i' % subject, 
                save='subject_plots/response_epochs/%i.png' % subject);

In [None]:
response_epochs_csd = eegf.surface_laplacian(response_epochs, m=5)

In [None]:
erp = response_epochs_csd.average().crop(-2, .1).apply_baseline((-2, -1.75))
eegf.plot_joint(erp, [-.1, 0], 
                title='CSD Response-locked ERP - %i' % subject, 
                save='subject_plots/response_epochs_csd/%i.png' % subject);

In [None]:
rt_order = np.argsort(response_epochs_csd.metadata['rt'])
mne.viz.plot_epochs_image(response_epochs_csd, 9, 
                          #vmin=-100, vmax=100,
                          order=rt_order,
                          overlay_times=-1*response_epochs_csd.metadata['rt'])

In [None]:
qs = ['visible==1 & difficult==0', 'visible==1 & difficult==1', 'visible==0']
labs = ['Easy', 'Hard', 'Guess']
do_contrast(response_epochs_csd, qs, labs, ch=9)

In [None]:
X = response_epochs_csd.copy().filter(None, 10).get_data()[:, 9] * million
t = response_epochs_csd.times
for i in range(X.shape[0]):
    plt.plot(t, X[i], color='b', alpha=.2)
plt.plot(t, X.mean(0), color='r')
eegf.flipy()

# End of Preprocessing

Now go back to the top, change the value of `subject`, and run it all again.