# Group Analysis

Notes
- 

In [81]:
%matplotlib inline

import os
from copy import deepcopy
from collections import Counter

import numpy as np
import matplotlib.pyplot as plt

import mne

from hypertools.tools.align import align

# Classification stuff
from sklearn import svm
from sklearn.model_selection import cross_val_score

## Settings

Note: Decode both to the stimulus, and to the response.

In [2]:
# Classification Settings
k_fold = 3

# Initialize SVM classification object
clf = svm.SVC(kernel='linear')

# Set the expected number of events per condition
expected_ev_counts = [25, 25]

# Set data size
n_epochs = 50
n_chs = 181
n_times = 1001

## Helper Functions

In [44]:
def prep_dat(dat):
    """Organize data for classification. 
    
    Parameters
    ----------
    dat : mne.Epochs object
        A subject's worth of epoched data.
    
    Returns
    -------
    labels : 1d array
        xx
    data : 3d array
        xx
    """

    # Check event codes there are, and unpack
    ev_counts = Counter(dat.events[:, 2])
    evc_a, evc_b = [str(el) for el in ev_counts.keys()]
    n_evc_a, n_evc_b = ev_counts.values()

    # Check the number of events is as expected
    if [n_evc_a, n_evc_b] != expected_ev_counts:
        raise ValueError('Number of events does not match what was expected.')
        
    # Generate labels
    labels = np.hstack([np.zeros(shape=[n_evc_a]), np.ones(shape=[n_evc_b])])
    
    # Organize data
    data = np.concatenate([dat[evc_a]._data, dat[evc_b]._data], 0)
    
    return data, labels


def extract_data(data_lst, flatten=False, transpose=False):
    """
    
    Parameters
    ----------
    data_lst : list of mne.Epochs objects
        xx

    Returns
    -------
    all_data : list of arrays
        xx
    """
    
    # Extract data matrices from MNE object
    all_data = [obj._data for obj in data_lst]
    
    # Flatten into continuous (2D) representation
    if flatten:
        all_data = [np.concatenate(dat, 1) for dat in all_data]
    
    # Transpose from [channels x features] to [features x channels]
    if flatten & transpose:
        all_data = [dat.T for dat in all_data]
    
    return all_data

    # Note: flatten & transpose both True is equivalent to:
    # all_data = [np.reshape(obj._data,
    #                        [obj._data.shape[1],
    #                         obj._data.shape[0] * obj._data.shape[2]]
    #                       ).T for obj in all_subjs]


def revert_3d(data_lst):
    """Reorganize a 2d matrix into the 3D trial structure.
    
    Parameters
    ----------
    
    Results
    -------
    
    """

    return [np.reshape(dat, [n_epochs, n_chs, n_times ]) for dat in data_lst]

## Data Organization / Loading

In [4]:
# Set data location for processed files
dat_path = '/Users/tom/Desktop/HyperEEG_Project/Data/proc/'

In [5]:
# Get list of available files
dat_files = [file for file in os.listdir(dat_path) if '.fif' in file]

In [6]:
# Load all data
all_subjs = [mne.read_epochs(os.path.join(dat_path, f_name),
                            preload=True, verbose=False) for f_name in dat_files]

# Check how many subjects there are
n_subjs = len(all_subjs)

In [12]:
# # TESTS

# # Load single subject data
dat = mne.read_epochs(os.path.join(dat_path, dat_files[0]), preload=True, verbose=False)

# Make test list of multi-subj data
all_subjs = [dat, dat, dat]
n_subjs = len(all_subjs)

## Within Subject Classification (un-aligned)

In [13]:
# Set the 

avg = np.max
#avg = np.min
#avg = np.mean
#avg = np.median

# n_times = len(dat.times)
# half_t = int(n_times/2)

In [14]:
# Organize subject data for classification
data, labels = prep_dat(dat)

# Optionally: sub-select features
data = avg(data[:, 0:128, :], 2)
#data = avg(data[:, 0:128, 0:half_t], 2)
#data = avg(data[:, 0:128, half_t:], 2)

In [15]:
# Run cross-validated classification
scores = cross_val_score(clf, data, labels, cv=k_fold)

In [16]:
# Check outcome
print('Cross-Validated prediction: {:1.2f}%'.format(np.mean(scores) * 100))

Cross-Validated prediction: 63.66%


## Between Subject Classification (un-aligned)

In [82]:
all_data, all_labels = [], []
for subj in all_subjs:
    data, labels = prep_dat(subj)
    all_data.append(data)
    all_labels.append(labels)

In [83]:
def btwn_subj_classication(all_data, all_labels):
    """Run classification between subjects."""

    scores = []
    
    for ind, subj_data, subj_labels in zip(range(len(all_data)), all_data, all_labels):

        # Take a copy of the group data, and drop held out subject
        temp_data = deepcopy(all_data)
        temp_labels = deepcopy(all_labels)
        del temp_data[ind]
        del temp_labels[ind]

        # Collapse group for training the model
        group_data = np.concatenate(temp_data, 0)
        group_labels = np.concatenate(temp_labels, 0)

        group_data = avg(group_data[:, 0:128, :], 2)

        # Train on group & classify left out subject
        clf = svm.SVC(kernel='linear')
        clf.fit(group_data, group_labels)
        scores.append(clf.score(avg(subj_data[:, 0:128, :], 2), subj_labels))

    return scores

In [84]:
#
btwn_scores = btwn_subj_classication(all_data, all_labels)
print(btwn_scores)
print(np.mean(btwn_scores))

[0.57999999999999996, 0.57999999999999996, 0.57999999999999996]
0.58


## Alignment


In [85]:
# Data organization - extract matrices, and flatten to continuous data
#  Note: this also switches orientation (takes the transpose) to match hypertools
all_data = extract_data(all_subjs, True, True)

In [87]:
# Do alignment
aligned_dat = align(all_data)
aligned_dat = [dat.T for dat in aligned_dat]
aligned_dat = revert_3d(aligned_dat)

## Between Subject Classification (aligned)

In [88]:
btwn_subj_classication(aligned_dat, all_labels)

[0.56000000000000005, 0.56000000000000005, 0.56000000000000005]

# Victory Party.