# Notebook #1: Merge individual participants

    NOTE: The raw EEG data are not provided in this repository.
    This means you won't be able to run 0.Preprocess.ipynb or 1.Merge.ipynb.

    These files are provided for completeness, and in the hope they might
    be of use to other researchers.

    Instead, we've included the preprocessed, epoched, and combined data as
    - data/trial_epo.fif: Stimulus-locked epochs
    - data/response_epo.fif: Response-locked epochs
    
    To start re-running the analyses, begin with 2.ERPs.ipynb.
    
The code in this notebook merges the epoch files for each participant,
and produces some optional exploratory plots.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import mne
from mne import io
import pandas as pd
import os
import matplotlib as mpl
import seaborn as sns
import sys

sys.path.append('src')
import eegf # My generic EEG Functions
import functions # Specific to this analysis

mpl.rcParams['font.size'] = 20
mpl.rcParams['axes.titlesize'] = 'medium'
mpl.rcParams['axes.labelsize'] = 'medium'
mpl.rcParams['xtick.labelsize'] = 'medium'
mpl.rcParams['ytick.labelsize'] = 'medium'
mpl.rcParams['legend.fontsize'] = 'medium'
mpl.rcParams['figure.titlesize'] = 'medium'
mpl.rcParams['figure.figsize'] = (8, 6)
mpl.rc('font', family='DejaVu Sans')
mpl.rcParams['svg.fonttype'] = 'none'
million = 1000000.

subjects = [1001, 1002, 1003, 1004,
            1005, 1006,       1008,
            1009, 1010, 1011, 1012,
            1013, 1014, 1015, 1016,
            1017, 1018, 1019, 1020,
            1021]

# Merge all metadata

In [None]:
merge_meta = False # Set to false to just read the merged data from file.
if merge_meta:
    subject_dfs = [functions.load_subject_csv(s) for s in subjects]
    data = pd.concat(subject_dfs)
    data.to_csv('data/all_trial_metadata.csv', index=False)
else:
    data = pd.read_csv('data/all_trial_metadata.csv')

In [None]:
response_data = data[data['rt'] > 0]
response_data['rt'].hist(bins=50)
plt.xlabel('RT')
plt.title('%.2f%% of RTs < 3 s' % (np.mean(response_data['rt'] < 3)*100))

# Merge Epochs

In [None]:
def load_subj_eeg(path, subject):
    fp = os.path.join(path, '%i_epo.fif' % subject)
    print('>>> Loading %s' % fp)
    epochs = mne.read_epochs(fp, preload=True).resample(125)
    return epochs

def load_all_eeg(path, subjects):
    subject_epochs = [load_subj_eeg(path, subject) for subject in subjects]
    epochs = mne.epochs.concatenate_epochs(subject_epochs)
    return epochs

from functions import raw_by_subject

## Stimulus (trial) locked

In [None]:
merge_trial = False
if merge_trial:
    trial_epochs = load_all_eeg('data/trial_epochs/', subjects)
    trial_epochs = trial_epochs.filter(.2,  None)
    trial_epochs.save('data/trial_epo.fif')
    trial_epochs_csd = eegf.surface_laplacian(trial_epochs, m=5)
    trial_epochs_csd.save('data/trial-csd_epo.fif')
else:
    trial_epochs = mne.read_epochs('data/trial_epo.fif')
    trial_epochs_csd = mne.read_epochs('data/trial-csd_epo.fif')

In [None]:
def plot_erp(epochs, ch=9):
    X = epochs.get_data()[:, ch]
    eegf.plot_mean_sem(X, epochs.times)
    plt.show()
    
plot_erp(trial_epochs, 26)

In [None]:
plot_erp(trial_epochs_csd, 26)

### Getting a feel for the data

You don't need to run any of this code, but it's useful to understand your data.
You can skip ahead to the next section, which merges the response-locked data.

In [None]:
def find_outlier_trials(epochs, thresh=120. / million):
    X = epochs.get_data()[:, :32]
    aX = np.abs(X).max(2).max(1)
    return aX > thresh

trial_rej = find_outlier_trials(trial_epochs, thresh=120./million)
df = trial_epochs.metadata
df['rej'] = trial_rej
print(df.groupby('participant').mean()['rej'])
print(trial_rej.mean())

In [None]:
trial_epochs = trial_epochs[trial_rej==False]
trial_epochs_csd = trial_epochs_csd[trial_rej==False]

In [None]:
raw_by_subject(trial_epochs, ch=26)

In [None]:
raw_by_subject(trial_epochs_csd, ch=26)

In [None]:
raw_by_subject(trial_epochs, show_raw=False, yl=30, ch=26)

In [None]:
## Covariance between channels
X = trial_epochs.get_data()[:, :32]
_r = [np.corrcoef(X[i]) for i in range(len(X))]
r = np.array(_r).mean(0)
plt.figure()
sns.heatmap(r, cmap='seismic', vmin=-1, vmax=1, center=0)
plt.title('Correlations: Standard data')
plt.show()

In [None]:
X = trial_epochs_csd.get_data()[:, :32]
_r = [np.corrcoef(X[i]) for i in range(len(X))]
r = np.array(_r).mean(0)
plt.figure()
sns.heatmap(r, cmap='seismic', vmin=-1, vmax=1, center=0)
plt.title('Covariance: CSD data')
plt.show()

In [None]:
# raw_by_subject(trial_epochs_csd[rej==False], show_raw=False, yl=50, ch=26)

In [None]:
X = functions.mean_by_subject(trial_epochs[trial_rej==False]) * million
t = trial_epochs.times
plt.figure(figsize=(20, 12))
for i in range(20):
    plt.plot(t, X[i, 26, :], label='S %i' % subjects[i])
plt.legend()
eegf.flipy()
plt.title('Subject ERPs at CPz')
plt.show()
    

In [None]:
X = functions.mean_by_subject(trial_epochs_csd[trial_rej==False]) * million
t = trial_epochs.times
plt.figure(figsize=(20, 12))
for i in range(20):
    plt.plot(t, X[i, 26, :], label='S %i' % subjects[i])
plt.legend()
eegf.flipy()
plt.title('Subject CSD ERPs at CPz')
plt.show()
    

In [None]:
for epochs, lbl in zip([trial_epochs, trial_epochs_csd], ['ERP', 'CSD ERP']):
    X = functions.mean_by_subject(epochs)[:, 26] * million
    eegf.plot_mean_sem(X, trial_epochs_csd.times, label=lbl)
plt.legend()
eegf.flipy()
plt.title('Grand ERPs at CPz (Different units)')
plt.show()

## Response-locked

In [None]:
merge_response = False
if merge_response:
    response_epochs = load_all_eeg('data/response_epochs/', subjects).filter(.2, None)
    response_epochs.save('data/response_epo.fif')
    response_epochs_csd = eegf.surface_laplacian(response_epochs, m=5)
    response_epochs_csd.save('data/response-csd_epo.fif')
else:
    response_epochs = mne.read_epochs('data/response_epo.fif')
    response_epochs_csd = mne.read_epochs('data/response-csd_epo.fif')


In [None]:
plot_erp(response_epochs, 9)

In [None]:
plot_erp(response_epochs_csd, 9)

### Getting a feel for the data

Again, you don't need to run anything from this point on.

In [None]:
resp_rej = find_outlier_trials(response_epochs, thresh=120./million)
df = response_epochs.metadata
df['rej'] = resp_rej
print(df.groupby('participant').mean()['rej'])
print(resp_rej.mean())

In [None]:
response_epochs = response_epochs[resp_rej==False]
response_epochs_csd = response_epochs_csd[resp_rej==False]

In [None]:
raw_by_subject(response_epochs, ch=9, yl=120)

In [None]:
raw_by_subject(response_epochs_csd, ch=9, yl=120)

In [None]:
raw_by_subject(response_epochs_csd, ch=9, yl=30, show_raw=False)

In [None]:
## Covariance between channels
X = response_epochs.get_data()[:, :32]
_r = [np.corrcoef(X[i]) for i in range(len(X))]
r = np.array(_r).mean(0)
plt.figure()
sns.heatmap(r, cmap='seismic', vmin=-1, vmax=1, center=0)
plt.title('Correlations: Standard data')
plt.show()


In [None]:
X = response_epochs_csd.get_data()[:, :32]
_r = [np.corrcoef(X[i]) for i in range(len(X))]
r = np.array(_r).mean(0)
plt.figure()
sns.heatmap(r, cmap='seismic', vmin=-1, vmax=1, center=0)
plt.title('Covariance: CSD data')
plt.show()

In [None]:
X = functions.mean_by_subject(response_epochs) * million
t = response_epochs.times
plt.figure(figsize=(20, 12))
for i in range(20):
    plt.plot(t, X[i, 9, :], label='S %i' % subjects[i])
plt.legend(loc='upper left')
eegf.flipy()
plt.title('Subject ERPs at FCz')
plt.show()


In [None]:
X = functions.mean_by_subject(response_epochs_csd) * million
t = response_epochs.times
plt.figure(figsize=(20, 12))
for i in range(20):
    plt.plot(t, X[i, 9, :], label='S %i' % subjects[i])
plt.legend(loc='upper left')
eegf.flipy()
plt.title('Subject CSD ERPs at FCz')
plt.show()


In [None]:
for epochs, lbl in zip([response_epochs, response_epochs_csd], ['ERP', 'CSD ERP']):
    X = functions.mean_by_subject(epochs)[:, 9] * million
    eegf.plot_mean_sem(X, response_epochs_csd.times, label=lbl)
plt.legend()
eegf.flipy()
plt.title('Grand ERPs at FCz (Different units)')
plt.show()