## Time concatenation analysis: demo 

In [1]:
# Import libraries
import numpy as np
from matplotlib import pyplot as plt
from sklearn.metrics import r2_score
import mne
from mne.decoding import SPoC
mne.set_log_level(verbose='warning') #to avoid info at terminal
import pickle 
import sys
# from Utilities folder
sys.path.insert(1, './Utilities/icn_m1')
import os
sys.path.insert(1, './Utilities/')
from FilterBank import FilterBank
from ML_models import get_model

from collections import OrderedDict
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler

In [2]:
# define settings
# change them accordinly
settings = {}
settings['data_path'] = "C:/Users/Pilin/Dropbox (Brain Modulation Lab)/Experiments/CRCNS/Data/Epoched/"
settings['num_patients'] = ['000'] # for this example we only use one subject
# subfolders indicates the session in this dataset
settings['subfolders']=[['ses-right']]  # this subject only has one session

In [3]:
# define functions
def append_time_dim(arr, y_, time_stamps):
    """
    apply added time dimension for the data array and label given time_stamps (with downsample_rate=100) in 100ms / need to check with 1375Hz
    @author: Timon Merk
    """
    time_arr = np.zeros([arr.shape[0]-time_stamps, int(time_stamps*arr.shape[1])])
    for time_idx, time_ in enumerate(np.arange(time_stamps, arr.shape[0])):
        for time_point in range(time_stamps):
            time_arr[time_idx, time_point*arr.shape[1]:(time_point+1)*arr.shape[1]] = arr[time_-time_point,:]
    return time_arr, y_[time_stamps:]     

In [4]:
# define some experiments and model parameters
laterality = ["CON", "IPS"]
signal = ["STN", "ECOG"]

cv = KFold(n_splits=5, shuffle=False)
spoc = SPoC(n_components=1, log=True, reg='oas', transform_into ='average_power', rank='full')
USED_MODEL = 3 # 3 == GLM with alpha 0.5

### Load the data from both modalities.
This data was already pre-processed, following these steps:
1. Epochs of 100 ms were extracted.
2. Band-passed filtered epoched data at 8 frequency bands ([4, 8], [8, 12], [13, 20], [20, 35], [13, 35], [60, 80], [90, 200], [60, 200])
3. The target variable was downsampled accordinly to the 100 ms epoch lenght.

If you want to run this code with your own data, please be sure of arranging your data in a 4d array as follows:
(epochs, channels, samples, frequency bands)

In [5]:
# get data
s = 0 # when working with all subjects, this is a for
subfolders=settings["subfolders"][s]
ss = 0 # when working with all subjects, this is a for, since there are subjects which have more than one session.
X_ECOG = [] # to append data
X_STN =[] 
Y_con = []
Y_ips = []
list_of_files_ecog = os.listdir(settings['data_path']+'ECOG') # list of files in the current directory
list_of_files_stn = os.listdir(settings['data_path']+'STN') 

file_name_ = 'ECOG_epochs_sub_' + settings['num_patients'][s] + '_sess_'+subfolders[ss][4:]

file_ecog = [each_file for each_file in list_of_files_ecog if each_file.startswith(file_name_)]
file_name_='STN_epochs_sub_' + settings['num_patients'][s] + '_sess_'+subfolders[ss][4:]

# only load data from runs in which both modali
file_stn= [each_file for each_file in list_of_files_stn if each_file.startswith(file_name_)]
idx_file = [f for f in file_stn if list(set() & set(file_ecog))]
matching_stn = [f for f in file_stn if any(f[4:] in xs for xs in file_ecog)]
matching_ecog = [f for f in file_ecog if any(f[4:] in xs for xs in file_stn)]

if len(matching_ecog) != len(matching_stn):
    raise('Error loading data')

for e in range(len(matching_ecog)):
    with open(settings['data_path'] +'ECOG/' + matching_ecog[e], 'rb') as handle:
        sub_ = pickle.load(handle)    
        data = sub_['epochs']
        X_ECOG.append(data)
        label_ips = sub_['label_ips']
        label_con = sub_['label_con']
        Y_con.append(label_con)
        Y_ips.append(label_ips)
    with open(settings['data_path'] +'STN/' + matching_stn[e], 'rb') as handle:
        sub_ = pickle.load(handle)
        data = sub_['epochs']
        X_STN.append(data)           

X_ECOG = np.concatenate(X_ECOG, axis=0)
X_STN = np.concatenate(X_STN, axis=0)
Y_con = np.concatenate(Y_con, axis=0)
Y_ips = np.concatenate(Y_ips, axis=0)  

In [6]:
# print data shape
print('The shape of the ECoG data is:\n(epochs, channels, samples, frequency bands):\n' + str(X_ECOG.shape))
print('The shape of the STN-LFP data is:\n(epochs, channels, samples, frequency bands):\n' + str(X_STN.shape))


The shape of the ECoG data is:
(epochs, channels, samples, frequency bands):
(2810, 6, 1001, 8)
The shape of the STN-LFP data is:
(epochs, channels, samples, frequency bands):
(2810, 3, 1001, 8)


In [7]:
# declare saving variable
# for the purpose of this example we save some of them, but here you can define as many saving variables as you want to!
Ypre_tr = OrderedDict()
score_tr = OrderedDict()
Ypre_te = OrderedDict()
score_te = OrderedDict()
Label_tr = OrderedDict()
Label_te = OrderedDict()

### Run the experiment!
(and be patient, it can take some minutes!)

In [None]:
# run the experiment for every type of signal, time concatenation window and laterality
for m, eeg in enumerate(signal): 
    if eeg == "ECOG":
        X = X_ECOG
    else:
        X = X_STN
    print('RUNNIN SUBJECT_'+ settings['num_patients'][s]+ '_SESS_'+ str(subfolders[ss]) + '_SIGNAL_' + eeg)
    for t in range(1,11):
        print("time_lag %s" %t)
        for ll, mov in enumerate(laterality):
            print("training %s" %mov)
            score_tr[mov] = []
            score_te[mov] = []
            Ypre_tr[mov] = []
            Ypre_te[mov] = []
            Label_tr[mov] = []
            Label_te[mov] = []
            Coef[mov] = []
            hyperparams[mov] = []
            if ll == 0:
                label = Y_con
            else:
                label = Y_ips

            features = FilterBank(estimator=spoc)

            for train_index, test_index in cv.split(label):
                Ztr, Zte = label[train_index], label[test_index]
                gtr = features.fit_transform(X[train_index], Ztr)
                gte = features.transform(X[test_index])


                dat_tr,label_tr = append_time_dim(gtr, Ztr,time_stamps=t)
                dat_te,label_te = append_time_dim(gte, Zte,time_stamps=t)

                Label_te[mov].append(label_te)
                Label_tr[mov].append(label_tr)

                clf, optimizer = get_model(USED_MODEL, x=dat_tr, y=label_tr)

                scaler = StandardScaler()
                scaler.fit(dat_tr)
                dat_tr = scaler.transform(dat_tr)
                dat_te = scaler.transform(dat_te)


                clf.fit(dat_tr, label_tr)
                Ypre_te[mov].append(clf.predict(dat_te))
                Ypre_tr[mov].append(clf.predict(dat_tr))
                # in the sklearn implementation r2 can be negative. We set negatives r2 to zero
                r2_te = r2_score(label_te, clf.predict(dat_te))
                if r2_te < 0: r2_te = 0
                score_te[mov].append(r2_te)
                r2_tr = r2_score(label_tr,clf.predict(dat_tr))
                if r2_tr < 0: r2_tr = 0

                score_tr[mov].append(r2_tr)

                if USED_MODEL > 1:
                    Coef[mov].append(clf.beta_)
                else:
                    Coef[mov].append(clf.coef_)
                hyperparams[mov].append(optimizer['params'])

RUNNIN SUBJECT_000_SESS_ses-right_SIGNAL_STN
time_lag 1
training CON
training IPS
time_lag 2
training CON
training IPS
time_lag 3
training CON
training IPS
time_lag 4
training CON
training IPS
time_lag 5
training CON
training IPS
time_lag 6
training CON
training IPS
